Skip to content

Commit

Permalink
fix styles
Browse files Browse the repository at this point in the history
  • Loading branch information
huyiwen committed May 24, 2024
1 parent 50bbbd5 commit 29e986b
Show file tree
Hide file tree
Showing 10 changed files with 46 additions and 31 deletions.
1 change: 1 addition & 0 deletions .github/workflows/isort-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ jobs:
with:
sort-paths: "utilization"
requirements-files: "requirements.txt requirements-openai.txt"
configuration: "--check-only --diff --settings-path .isort.cfg"
1 change: 1 addition & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
[settings]
line_length=120
multi_line_output=5
2 changes: 2 additions & 0 deletions tests/test_empty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def test_empty():
assert True
6 changes: 5 additions & 1 deletion utilization/dataset/dataset_enum.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
WMT_DATASETS = ["wmt10", "wmt13", "wmt14", "wmt15", "wmt16", "wmt17", "wmt18", "wmt19", "wmt21"]

DEFAULT_VLLM_DATASETS = {"alpaca_eval", "bbh", "cnn_dailymail", "color_objects", "coqa", "drop", "gaokao", "gsm8k", "halueval", "humaneval", "ifeval", "lambada", "math", "mbpp", "mt_bench", "nq", "quac", "real_toxicity_prompts", "squad", "squad_v2", "tldr", "triviaqa", "vicuna_bench", "webq", "xsum"} | set(WMT_DATASETS)
DEFAULT_VLLM_DATASETS = {
"alpaca_eval", "bbh", "cnn_dailymail", "color_objects", "coqa", "drop", "gaokao", "gsm8k", "halueval", "humaneval",
"ifeval", "lambada", "math", "mbpp", "mt_bench", "nq", "quac", "real_toxicity_prompts", "squad", "squad_v2", "tldr",
"triviaqa", "vicuna_bench", "webq", "xsum"
} | set(WMT_DATASETS)

DATASET_ALIASES = {
"agieval": ["agieval_single_choice", "agieval_cot"], # try to use MultipleChoiceDataset first
Expand Down
42 changes: 26 additions & 16 deletions utilization/dataset/gpqa.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from functools import cached_property
import json
from functools import cached_property
from logging import getLogger
from typing import List, Optional, Tuple, Union
from pathlib import Path

from typing import List, Optional, Tuple, Union

from .multiple_choice_dataset import MultipleChoiceDataset

Expand All @@ -12,14 +11,14 @@
FIVE_SHOT_INSTRUCTION = ""

INSTRUCTIONS = {
"five_shot": """{{ 'Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('When you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)".') }}""",

"zero_shot": """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\n\nFormat your response as follows: "The correct answer is (insert answer here)".""",

"five_shot_cot": """{{ 'Here are some example questions from experts. An explanation is given before the final answer. Answer the final question yourself, giving your reasoning beforehand.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('Give step by step reasoning before you answer, and when you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)":\n') }}""",

"zero_shot_cot": """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\nLet's think step by step: __SEPARATOR__\n\nBased on the above, what is the single, most likely answer choice? Answer in the format "The correct answer is (insert answer here)".""",

"five_shot":
"""{{ 'Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('When you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)".') }}""",
"zero_shot":
"""What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\n\nFormat your response as follows: "The correct answer is (insert answer here)".""",
"five_shot_cot":
"""{{ 'Here are some example questions from experts. An explanation is given before the final answer. Answer the final question yourself, giving your reasoning beforehand.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('Give step by step reasoning before you answer, and when you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)":\n') }}""",
"zero_shot_cot":
"""What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\nLet's think step by step: __SEPARATOR__\n\nBased on the above, what is the single, most likely answer choice? Answer in the format "The correct answer is (insert answer here)".""",
"retrieval": "{{ bing }}\n\nQuestion: {{ question }}{{ '\n\nChoices:\n' + options if options }}"
}

Expand All @@ -38,7 +37,9 @@ class GPQA(MultipleChoiceDataset):
def init_arguments(self):

if self.sample_num > 1 and (self.max_num_shots == 0 or self.cot is None):
logger.warning("Self-consistency is only available for 5-shot CoT. Automatically set max_num_shots to 5 and cot to base.")
logger.warning(
"Self-consistency is only available for 5-shot CoT. Automatically set max_num_shots to 5 and cot to base."
)
self.max_num_shots = 5
self.cot = "base"

Expand Down Expand Up @@ -85,7 +86,9 @@ def read_file(base_path, search_path):
else:
return open(examples_json, "r").read()

def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[str], evaluation_set: str, example_set: Optional[str]):
def load_raw_dataset(
self, dataset_path: Optional[str], subset_name: Optional[str], evaluation_set: str, example_set: Optional[str]
):

super().load_raw_dataset(dataset_path, subset_name, evaluation_set, None)

Expand All @@ -98,7 +101,9 @@ def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[st
SEARCH_PATH = ["prompts", "chain_of_thought_examples.json"]
data = self.read_file(example_set_path, SEARCH_PATH)
if data is None:
raise FileNotFoundError("Example set not found. Please provide the correct path to the example set through `--example_set path/to/prompts`.")
raise FileNotFoundError(
"Example set not found. Please provide the correct path to the example set through `--example_set path/to/prompts`."
)
self.example_data = json.loads(data)["questions"]

elif self.max_num_shots == 0 and self.cot in ["retrieval", "retrieval_content"]:
Expand All @@ -108,7 +113,9 @@ def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[st
SEARCH_PATH = ["prompts", "bing-search-cot-content.txt"]
data = self.read_file(example_set_path, SEARCH_PATH)
if data is None:
raise FileNotFoundError("Chain-of-thought propmts for open-book not found. Please provide the correct path to the example set through `--example_set path/to/prompts`.")
raise FileNotFoundError(
"Chain-of-thought propmts for open-book not found. Please provide the correct path to the example set through `--example_set path/to/prompts`."
)
self.bing_data = data

def format_instance(self, instance):
Expand All @@ -120,7 +127,10 @@ def format_instance(self, instance):
else:
# https://huggingface.co/datasets/Idavidrein/gpqa
instance["question"] = instance["Question"]
instance["options"] = [instance["Incorrect Answer 1"], instance["Incorrect Answer 2"], instance["Incorrect Answer 3"], instance["Correct Answer"]]
instance["options"] = [
instance["Incorrect Answer 1"], instance["Incorrect Answer 2"], instance["Incorrect Answer 3"],
instance["Correct Answer"]
]
order = self.shuffle_options(instance["options"])
instance["target_idx"] = order.index(3)
return instance
Expand Down
4 changes: 3 additions & 1 deletion utilization/dataset/multiple_choice_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ def post_processing(self, predictions: Union[List[Tuple[float, int]], List[List[
for m in self.metrics:
if isinstance(m, Accuracy):
m.tag = "Norm"
normalized_predictions = np.array([rc[0] - ra[0] for rc, ra in zip(predictions[::2], predictions[1::2])])
normalized_predictions = np.array([
rc[0] - ra[0] for rc, ra in zip(predictions[::2], predictions[1::2])
])
else:
normalized_predictions = np.array([
result / length if length > 0 else LARGE_POSITIVE for result, length in predictions
Expand Down
4 changes: 2 additions & 2 deletions utilization/dataset/tydiqa.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from functools import cached_property

import re
from ..metric import F1, Em
from ..metric import F1
from .generation_dataset import GenerationDataset


Expand Down
11 changes: 4 additions & 7 deletions utilization/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ class DatasetUtilMixin:

answer_prompt: str = "Answer:"

def set_tokenizer(self, tokenizer: Union[tiktoken.Encoding, PreTrainedTokenizer, PreTrainedTokenizerFast, None]) -> None:
def set_tokenizer(
self, tokenizer: Union[tiktoken.Encoding, PreTrainedTokenizer, PreTrainedTokenizerFast, None]
) -> None:
self.tokenizer = tokenizer
if isinstance(tokenizer, tiktoken.Encoding):
# Encoding.encode_ordinary is slightly faster than Encoding.encode
Expand Down Expand Up @@ -234,12 +236,7 @@ def load_fn(split):

def load_fn(split):
return ds.load_dataset(
_path,
subset_name,
split=split,
cache_dir=dataset_path,
trust_remote_code=True,
**load_kwargs
_path, subset_name, split=split, cache_dir=dataset_path, trust_remote_code=True, **load_kwargs
)
elif offline:

Expand Down
2 changes: 1 addition & 1 deletion utilization/model/anthropic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Anthropic(ApiModel):
anthropic.APIConnectionError, anthropic.AuthenticationError, anthropic.BadRequestError, anthropic.ConflictError,
anthropic.NotFoundError, anthropic.PermissionDeniedError, anthropic.UnprocessableEntityError
)
_skip_errors = (anthropic.InternalServerError, )
_skip_errors = (anthropic.InternalServerError,)

_repr = ["model_type", "model_backend", "multi_turn"]

Expand Down
4 changes: 1 addition & 3 deletions utilization/model/huggingface_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,9 +689,7 @@ def _generation(
if isinstance(batched_inputs[0], Conversation):
# save the original conversation for chat-based model
batched_conversations = batched_inputs
batched_inputs_nofilter = [
conv.to_model_prompt(max_turns=max_turns) for conv in batched_conversations
]
batched_inputs_nofilter = [conv.to_model_prompt(max_turns=max_turns) for conv in batched_conversations]
# deal with conversations with different number of turns
batched_inputs = [i for i in batched_inputs_nofilter if i is not None]

Expand Down

0 comments on commit 29e986b

Please sign in to comment.