From 29e986b59d1182a42b4cb081241ef11008e3823c Mon Sep 17 00:00:00 2001 From: huyiwen <1020030101@qq.com> Date: Fri, 24 May 2024 15:41:26 +0800 Subject: [PATCH] fix styles --- .github/workflows/isort-check.yml | 1 + .isort.cfg | 1 + tests/test_empty.py | 2 + utilization/dataset/dataset_enum.py | 6 ++- utilization/dataset/gpqa.py | 42 ++++++++++++------- .../dataset/multiple_choice_dataset.py | 4 +- utilization/dataset/tydiqa.py | 4 +- utilization/dataset/utils.py | 11 ++--- utilization/model/anthropic_model.py | 2 +- utilization/model/huggingface_model.py | 4 +- 10 files changed, 46 insertions(+), 31 deletions(-) create mode 100644 tests/test_empty.py diff --git a/.github/workflows/isort-check.yml b/.github/workflows/isort-check.yml index 2f8b64d5..f2eb7e2a 100644 --- a/.github/workflows/isort-check.yml +++ b/.github/workflows/isort-check.yml @@ -11,3 +11,4 @@ jobs: with: sort-paths: "utilization" requirements-files: "requirements.txt requirements-openai.txt" + configuration: "--check-only --diff --settings-path .isort.cfg" \ No newline at end of file diff --git a/.isort.cfg b/.isort.cfg index 8913d962..0a079c8d 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,2 +1,3 @@ [settings] line_length=120 +multi_line_output=5 \ No newline at end of file diff --git a/tests/test_empty.py b/tests/test_empty.py new file mode 100644 index 00000000..80d2166c --- /dev/null +++ b/tests/test_empty.py @@ -0,0 +1,2 @@ +def test_empty(): + assert True diff --git a/utilization/dataset/dataset_enum.py b/utilization/dataset/dataset_enum.py index 1fe974fb..a68b97db 100644 --- a/utilization/dataset/dataset_enum.py +++ b/utilization/dataset/dataset_enum.py @@ -1,6 +1,10 @@ WMT_DATASETS = ["wmt10", "wmt13", "wmt14", "wmt15", "wmt16", "wmt17", "wmt18", "wmt19", "wmt21"] -DEFAULT_VLLM_DATASETS = {"alpaca_eval", "bbh", "cnn_dailymail", "color_objects", "coqa", "drop", "gaokao", "gsm8k", "halueval", "humaneval", "ifeval", "lambada", "math", "mbpp", "mt_bench", "nq", "quac", "real_toxicity_prompts", "squad", "squad_v2", "tldr", "triviaqa", "vicuna_bench", "webq", "xsum"} | set(WMT_DATASETS) +DEFAULT_VLLM_DATASETS = { + "alpaca_eval", "bbh", "cnn_dailymail", "color_objects", "coqa", "drop", "gaokao", "gsm8k", "halueval", "humaneval", + "ifeval", "lambada", "math", "mbpp", "mt_bench", "nq", "quac", "real_toxicity_prompts", "squad", "squad_v2", "tldr", + "triviaqa", "vicuna_bench", "webq", "xsum" +} | set(WMT_DATASETS) DATASET_ALIASES = { "agieval": ["agieval_single_choice", "agieval_cot"], # try to use MultipleChoiceDataset first diff --git a/utilization/dataset/gpqa.py b/utilization/dataset/gpqa.py index b56a7063..b5195f68 100644 --- a/utilization/dataset/gpqa.py +++ b/utilization/dataset/gpqa.py @@ -1,9 +1,8 @@ -from functools import cached_property import json +from functools import cached_property from logging import getLogger -from typing import List, Optional, Tuple, Union from pathlib import Path - +from typing import List, Optional, Tuple, Union from .multiple_choice_dataset import MultipleChoiceDataset @@ -12,14 +11,14 @@ FIVE_SHOT_INSTRUCTION = "" INSTRUCTIONS = { - "five_shot": """{{ 'Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('When you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)".') }}""", - - "zero_shot": """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\n\nFormat your response as follows: "The correct answer is (insert answer here)".""", - - "five_shot_cot": """{{ 'Here are some example questions from experts. An explanation is given before the final answer. Answer the final question yourself, giving your reasoning beforehand.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('Give step by step reasoning before you answer, and when you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)":\n') }}""", - - "zero_shot_cot": """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\nLet's think step by step: __SEPARATOR__\n\nBased on the above, what is the single, most likely answer choice? Answer in the format "The correct answer is (insert answer here)".""", - + "five_shot": + """{{ 'Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('When you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)".') }}""", + "zero_shot": + """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\n\nFormat your response as follows: "The correct answer is (insert answer here)".""", + "five_shot_cot": + """{{ 'Here are some example questions from experts. An explanation is given before the final answer. Answer the final question yourself, giving your reasoning beforehand.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('Give step by step reasoning before you answer, and when you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)":\n') }}""", + "zero_shot_cot": + """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\nLet's think step by step: __SEPARATOR__\n\nBased on the above, what is the single, most likely answer choice? Answer in the format "The correct answer is (insert answer here)".""", "retrieval": "{{ bing }}\n\nQuestion: {{ question }}{{ '\n\nChoices:\n' + options if options }}" } @@ -38,7 +37,9 @@ class GPQA(MultipleChoiceDataset): def init_arguments(self): if self.sample_num > 1 and (self.max_num_shots == 0 or self.cot is None): - logger.warning("Self-consistency is only available for 5-shot CoT. Automatically set max_num_shots to 5 and cot to base.") + logger.warning( + "Self-consistency is only available for 5-shot CoT. Automatically set max_num_shots to 5 and cot to base." + ) self.max_num_shots = 5 self.cot = "base" @@ -85,7 +86,9 @@ def read_file(base_path, search_path): else: return open(examples_json, "r").read() - def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[str], evaluation_set: str, example_set: Optional[str]): + def load_raw_dataset( + self, dataset_path: Optional[str], subset_name: Optional[str], evaluation_set: str, example_set: Optional[str] + ): super().load_raw_dataset(dataset_path, subset_name, evaluation_set, None) @@ -98,7 +101,9 @@ def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[st SEARCH_PATH = ["prompts", "chain_of_thought_examples.json"] data = self.read_file(example_set_path, SEARCH_PATH) if data is None: - raise FileNotFoundError("Example set not found. Please provide the correct path to the example set through `--example_set path/to/prompts`.") + raise FileNotFoundError( + "Example set not found. Please provide the correct path to the example set through `--example_set path/to/prompts`." + ) self.example_data = json.loads(data)["questions"] elif self.max_num_shots == 0 and self.cot in ["retrieval", "retrieval_content"]: @@ -108,7 +113,9 @@ def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[st SEARCH_PATH = ["prompts", "bing-search-cot-content.txt"] data = self.read_file(example_set_path, SEARCH_PATH) if data is None: - raise FileNotFoundError("Chain-of-thought propmts for open-book not found. Please provide the correct path to the example set through `--example_set path/to/prompts`.") + raise FileNotFoundError( + "Chain-of-thought propmts for open-book not found. Please provide the correct path to the example set through `--example_set path/to/prompts`." + ) self.bing_data = data def format_instance(self, instance): @@ -120,7 +127,10 @@ def format_instance(self, instance): else: # https://huggingface.co/datasets/Idavidrein/gpqa instance["question"] = instance["Question"] - instance["options"] = [instance["Incorrect Answer 1"], instance["Incorrect Answer 2"], instance["Incorrect Answer 3"], instance["Correct Answer"]] + instance["options"] = [ + instance["Incorrect Answer 1"], instance["Incorrect Answer 2"], instance["Incorrect Answer 3"], + instance["Correct Answer"] + ] order = self.shuffle_options(instance["options"]) instance["target_idx"] = order.index(3) return instance diff --git a/utilization/dataset/multiple_choice_dataset.py b/utilization/dataset/multiple_choice_dataset.py index 8d768592..30bbf84e 100644 --- a/utilization/dataset/multiple_choice_dataset.py +++ b/utilization/dataset/multiple_choice_dataset.py @@ -97,7 +97,9 @@ def post_processing(self, predictions: Union[List[Tuple[float, int]], List[List[ for m in self.metrics: if isinstance(m, Accuracy): m.tag = "Norm" - normalized_predictions = np.array([rc[0] - ra[0] for rc, ra in zip(predictions[::2], predictions[1::2])]) + normalized_predictions = np.array([ + rc[0] - ra[0] for rc, ra in zip(predictions[::2], predictions[1::2]) + ]) else: normalized_predictions = np.array([ result / length if length > 0 else LARGE_POSITIVE for result, length in predictions diff --git a/utilization/dataset/tydiqa.py b/utilization/dataset/tydiqa.py index 7f7ff0c3..70750edc 100644 --- a/utilization/dataset/tydiqa.py +++ b/utilization/dataset/tydiqa.py @@ -1,7 +1,7 @@ +import re from functools import cached_property -import re -from ..metric import F1, Em +from ..metric import F1 from .generation_dataset import GenerationDataset diff --git a/utilization/dataset/utils.py b/utilization/dataset/utils.py index 7b02f6f9..5c423a7e 100644 --- a/utilization/dataset/utils.py +++ b/utilization/dataset/utils.py @@ -26,7 +26,9 @@ class DatasetUtilMixin: answer_prompt: str = "Answer:" - def set_tokenizer(self, tokenizer: Union[tiktoken.Encoding, PreTrainedTokenizer, PreTrainedTokenizerFast, None]) -> None: + def set_tokenizer( + self, tokenizer: Union[tiktoken.Encoding, PreTrainedTokenizer, PreTrainedTokenizerFast, None] + ) -> None: self.tokenizer = tokenizer if isinstance(tokenizer, tiktoken.Encoding): # Encoding.encode_ordinary is slightly faster than Encoding.encode @@ -234,12 +236,7 @@ def load_fn(split): def load_fn(split): return ds.load_dataset( - _path, - subset_name, - split=split, - cache_dir=dataset_path, - trust_remote_code=True, - **load_kwargs + _path, subset_name, split=split, cache_dir=dataset_path, trust_remote_code=True, **load_kwargs ) elif offline: diff --git a/utilization/model/anthropic_model.py b/utilization/model/anthropic_model.py index 8ff049be..388c866b 100644 --- a/utilization/model/anthropic_model.py +++ b/utilization/model/anthropic_model.py @@ -26,7 +26,7 @@ class Anthropic(ApiModel): anthropic.APIConnectionError, anthropic.AuthenticationError, anthropic.BadRequestError, anthropic.ConflictError, anthropic.NotFoundError, anthropic.PermissionDeniedError, anthropic.UnprocessableEntityError ) - _skip_errors = (anthropic.InternalServerError, ) + _skip_errors = (anthropic.InternalServerError,) _repr = ["model_type", "model_backend", "multi_turn"] diff --git a/utilization/model/huggingface_model.py b/utilization/model/huggingface_model.py index 06dce774..9840893b 100644 --- a/utilization/model/huggingface_model.py +++ b/utilization/model/huggingface_model.py @@ -689,9 +689,7 @@ def _generation( if isinstance(batched_inputs[0], Conversation): # save the original conversation for chat-based model batched_conversations = batched_inputs - batched_inputs_nofilter = [ - conv.to_model_prompt(max_turns=max_turns) for conv in batched_conversations - ] + batched_inputs_nofilter = [conv.to_model_prompt(max_turns=max_turns) for conv in batched_conversations] # deal with conversations with different number of turns batched_inputs = [i for i in batched_inputs_nofilter if i is not None]