fix styles

huyiwen · May 24, 2024 · 29e986b · 29e986b
1 parent 50bbbd5
commit 29e986b
Show file tree

Hide file tree

Showing 10 changed files with 46 additions and 31 deletions.
diff --git a/.github/workflows/isort-check.yml b/.github/workflows/isort-check.yml
@@ -11,3 +11,4 @@ jobs:
         with:
             sort-paths: "utilization"
             requirements-files: "requirements.txt requirements-openai.txt"
+            configuration: "--check-only --diff --settings-path .isort.cfg"
diff --git a/.isort.cfg b/.isort.cfg
@@ -1,2 +1,3 @@
 [settings]
 line_length=120
+multi_line_output=5
diff --git a/tests/test_empty.py b/tests/test_empty.py
@@ -0,0 +1,2 @@
+def test_empty():
+    assert True
diff --git a/utilization/dataset/dataset_enum.py b/utilization/dataset/dataset_enum.py
@@ -1,6 +1,10 @@
 WMT_DATASETS = ["wmt10", "wmt13", "wmt14", "wmt15", "wmt16", "wmt17", "wmt18", "wmt19", "wmt21"]
 
-DEFAULT_VLLM_DATASETS = {"alpaca_eval", "bbh", "cnn_dailymail", "color_objects", "coqa", "drop", "gaokao", "gsm8k", "halueval", "humaneval", "ifeval", "lambada", "math", "mbpp", "mt_bench", "nq", "quac", "real_toxicity_prompts", "squad", "squad_v2", "tldr", "triviaqa", "vicuna_bench", "webq", "xsum"} | set(WMT_DATASETS)
+DEFAULT_VLLM_DATASETS = {
+    "alpaca_eval", "bbh", "cnn_dailymail", "color_objects", "coqa", "drop", "gaokao", "gsm8k", "halueval", "humaneval",
+    "ifeval", "lambada", "math", "mbpp", "mt_bench", "nq", "quac", "real_toxicity_prompts", "squad", "squad_v2", "tldr",
+    "triviaqa", "vicuna_bench", "webq", "xsum"
+} | set(WMT_DATASETS)
 
 DATASET_ALIASES = {
     "agieval": ["agieval_single_choice", "agieval_cot"],  # try to use MultipleChoiceDataset first

diff --git a/utilization/dataset/gpqa.py b/utilization/dataset/gpqa.py
@@ -1,9 +1,8 @@
-from functools import cached_property
 import json
+from functools import cached_property
 from logging import getLogger
-from typing import List, Optional, Tuple, Union
 from pathlib import Path
-
+from typing import List, Optional, Tuple, Union
 
 from .multiple_choice_dataset import MultipleChoiceDataset
 
@@ -12,14 +11,14 @@
 FIVE_SHOT_INSTRUCTION = ""
 
 INSTRUCTIONS = {
-    "five_shot": """{{ 'Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('When you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)".') }}""",
-
-    "zero_shot": """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\n\nFormat your response as follows: "The correct answer is (insert answer here)".""",
-
-    "five_shot_cot": """{{ 'Here are some example questions from experts. An explanation is given before the final answer. Answer the final question yourself, giving your reasoning beforehand.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('Give step by step reasoning before you answer, and when you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)":\n') }}""",
-
-    "zero_shot_cot": """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\nLet's think step by step: __SEPARATOR__\n\nBased on the above, what is the single, most likely answer choice? Answer in the format "The correct answer is (insert answer here)".""",
-
+    "five_shot":
+    """{{ 'Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('When you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)".') }}""",
+    "zero_shot":
+    """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\n\nFormat your response as follows: "The correct answer is (insert answer here)".""",
+    "five_shot_cot":
+    """{{ 'Here are some example questions from experts. An explanation is given before the final answer. Answer the final question yourself, giving your reasoning beforehand.\n' if example_idx == 0 }}Question: {{ question }}{{ '\nChoices:\n' + options if options }}\n{{ 'The correct answer is' if example_idx >= 0 else ('Give step by step reasoning before you answer, and when you' + "'" + 're ready to answer, please use the format "The correct answer is (insert answer here)":\n') }}""",
+    "zero_shot_cot":
+    """What is the correct answer to this question: {{ question }}{{ '\n\nChoices:\n' + options if options }}\nLet's think step by step: __SEPARATOR__\n\nBased on the above, what is the single, most likely answer choice? Answer in the format "The correct answer is (insert answer here)".""",
     "retrieval": "{{ bing }}\n\nQuestion: {{ question }}{{ '\n\nChoices:\n' + options if options }}"
 }
 
@@ -38,7 +37,9 @@ class GPQA(MultipleChoiceDataset):
     def init_arguments(self):
 
         if self.sample_num > 1 and (self.max_num_shots == 0 or self.cot is None):
-            logger.warning("Self-consistency is only available for 5-shot CoT. Automatically set max_num_shots to 5 and cot to base.")
+            logger.warning(
+                "Self-consistency is only available for 5-shot CoT. Automatically set max_num_shots to 5 and cot to base."
+            )
             self.max_num_shots = 5
             self.cot = "base"
 
@@ -85,7 +86,9 @@ def read_file(base_path, search_path):
         else:
             return open(examples_json, "r").read()
 
-    def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[str], evaluation_set: str, example_set: Optional[str]):
+    def load_raw_dataset(
+        self, dataset_path: Optional[str], subset_name: Optional[str], evaluation_set: str, example_set: Optional[str]
+    ):
 
         super().load_raw_dataset(dataset_path, subset_name, evaluation_set, None)
 
@@ -98,7 +101,9 @@ def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[st
                 SEARCH_PATH = ["prompts", "chain_of_thought_examples.json"]
                 data = self.read_file(example_set_path, SEARCH_PATH)
                 if data is None:
-                    raise FileNotFoundError("Example set not found. Please provide the correct path to the example set through `--example_set path/to/prompts`.")
+                    raise FileNotFoundError(
+                        "Example set not found. Please provide the correct path to the example set through `--example_set path/to/prompts`."
+                    )
                 self.example_data = json.loads(data)["questions"]
 
             elif self.max_num_shots == 0 and self.cot in ["retrieval", "retrieval_content"]:
@@ -108,7 +113,9 @@ def load_raw_dataset(self, dataset_path: Optional[str], subset_name: Optional[st
                     SEARCH_PATH = ["prompts", "bing-search-cot-content.txt"]
                 data = self.read_file(example_set_path, SEARCH_PATH)
                 if data is None:
-                    raise FileNotFoundError("Chain-of-thought propmts for open-book not found. Please provide the correct path to the example set through `--example_set path/to/prompts`.")
+                    raise FileNotFoundError(
+                        "Chain-of-thought propmts for open-book not found. Please provide the correct path to the example set through `--example_set path/to/prompts`."
+                    )
                 self.bing_data = data
 
     def format_instance(self, instance):
@@ -120,7 +127,10 @@ def format_instance(self, instance):
         else:
             # https://huggingface.co/datasets/Idavidrein/gpqa
             instance["question"] = instance["Question"]
-            instance["options"] = [instance["Incorrect Answer 1"], instance["Incorrect Answer 2"], instance["Incorrect Answer 3"], instance["Correct Answer"]]
+            instance["options"] = [
+                instance["Incorrect Answer 1"], instance["Incorrect Answer 2"], instance["Incorrect Answer 3"],
+                instance["Correct Answer"]
+            ]
             order = self.shuffle_options(instance["options"])
             instance["target_idx"] = order.index(3)
         return instance

diff --git a/utilization/dataset/multiple_choice_dataset.py b/utilization/dataset/multiple_choice_dataset.py
@@ -97,7 +97,9 @@ def post_processing(self, predictions: Union[List[Tuple[float, int]], List[List[
                 for m in self.metrics:
                     if isinstance(m, Accuracy):
                         m.tag = "Norm"
-                normalized_predictions = np.array([rc[0] - ra[0] for rc, ra in zip(predictions[::2], predictions[1::2])])
+                normalized_predictions = np.array([
+                    rc[0] - ra[0] for rc, ra in zip(predictions[::2], predictions[1::2])
+                ])
             else:
                 normalized_predictions = np.array([
                     result / length if length > 0 else LARGE_POSITIVE for result, length in predictions

diff --git a/utilization/dataset/tydiqa.py b/utilization/dataset/tydiqa.py
@@ -1,7 +1,7 @@
+import re
 from functools import cached_property
 
-import re
-from ..metric import F1, Em
+from ..metric import F1
 from .generation_dataset import GenerationDataset
 
 

diff --git a/utilization/dataset/utils.py b/utilization/dataset/utils.py
@@ -26,7 +26,9 @@ class DatasetUtilMixin:
 
     answer_prompt: str = "Answer:"
 
-    def set_tokenizer(self, tokenizer: Union[tiktoken.Encoding, PreTrainedTokenizer, PreTrainedTokenizerFast, None]) -> None:
+    def set_tokenizer(
+        self, tokenizer: Union[tiktoken.Encoding, PreTrainedTokenizer, PreTrainedTokenizerFast, None]
+    ) -> None:
         self.tokenizer = tokenizer
         if isinstance(tokenizer, tiktoken.Encoding):
             # Encoding.encode_ordinary is slightly faster than Encoding.encode
@@ -234,12 +236,7 @@ def load_fn(split):
 
                 def load_fn(split):
                     return ds.load_dataset(
-                        _path,
-                        subset_name,
-                        split=split,
-                        cache_dir=dataset_path,
-                        trust_remote_code=True,
-                        **load_kwargs
+                        _path, subset_name, split=split, cache_dir=dataset_path, trust_remote_code=True, **load_kwargs
                     )
             elif offline:
 

diff --git a/utilization/model/anthropic_model.py b/utilization/model/anthropic_model.py
@@ -26,7 +26,7 @@ class Anthropic(ApiModel):
         anthropic.APIConnectionError, anthropic.AuthenticationError, anthropic.BadRequestError, anthropic.ConflictError,
         anthropic.NotFoundError, anthropic.PermissionDeniedError, anthropic.UnprocessableEntityError
     )
-    _skip_errors = (anthropic.InternalServerError, )
+    _skip_errors = (anthropic.InternalServerError,)
 
     _repr = ["model_type", "model_backend", "multi_turn"]
 

diff --git a/utilization/model/huggingface_model.py b/utilization/model/huggingface_model.py
@@ -689,9 +689,7 @@ def _generation(
         if isinstance(batched_inputs[0], Conversation):
             # save the original conversation for chat-based model
             batched_conversations = batched_inputs
-            batched_inputs_nofilter = [
-                conv.to_model_prompt(max_turns=max_turns) for conv in batched_conversations
-            ]
+            batched_inputs_nofilter = [conv.to_model_prompt(max_turns=max_turns) for conv in batched_conversations]
             # deal with conversations with different number of turns
             batched_inputs = [i for i in batched_inputs_nofilter if i is not None]