Skip to content

Commit

Permalink
remove
Browse files Browse the repository at this point in the history
  • Loading branch information
bmosaicml committed Oct 25, 2023
1 parent 27754e8 commit b00a806
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 118 deletions.
117 changes: 0 additions & 117 deletions scripts/eval/yamls/eval_gauntlet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,6 @@ eval_gauntlet:
- reading_comprehension
- safety
- programming
lm_task_average:
- world_knowledge_lm_task_subscore
- commonsense_reasoning_lm_task_subscore
- language_understanding_lm_task_subscore
- symbolic_problem_solving_lm_task_subscore
- reading_comprehension_lm_task_subscore
lite_average:
- world_knowledge_lite
- commonsense_reasoning_lite
- language_understanding_lite
- symbolic_problem_solving_lite
- reading_comprehension_lite
- programming_lite
categories:
- name: world_knowledge
benchmarks:
Expand Down Expand Up @@ -213,107 +200,3 @@ eval_gauntlet:
- name: human_eval_execution_prediction
num_fewshot: 3
random_baseline: 0.0
- name: world_knowledge_lm_task_subscore
benchmarks:
- name: jeopardy
num_fewshot: 10
random_baseline: 0
- name: bigbench_qa_wikidata
num_fewshot: 10
random_baseline: 0
- name: language_understanding_lm_task_subscore
benchmarks:
- name: lambada_openai
num_fewshot: 0
random_baseline: 0.0
- name: bigbench_conlang_translation
num_fewshot: 0
random_baseline: 0.0
- name: symbolic_problem_solving_lm_task_subscore
benchmarks:
- name: bigbench_dyck_languages
num_fewshot: 10
random_baseline: 0
- name: bigbench_cs_algorithms
num_fewshot: 10
random_baseline: 0
- name: bigbench_operators
num_fewshot: 10
random_baseline: 0.0
- name: bigbench_repeat_copy_logic
num_fewshot: 10
random_baseline: 0.0
- name: simple_arithmetic_withspaces
num_fewshot: 10
random_baseline: 0.0
- name: simple_arithmetic_nospaces
num_fewshot: 10
random_baseline: 0.0
- name: reading_comprehension_lm_task_subscore
benchmarks:
- name: pubmed_qa_labeled
num_fewshot: 10
random_baseline: 0.0
- name: squad
num_fewshot: 10
random_baseline: 0
- name: world_knowledge_lite
benchmarks:
- name: jeopardy
num_fewshot: 10
random_baseline: 0
- name: arc_challenge
num_fewshot: 10
random_baseline: 0.25
- name: commonsense_reasoning_lite
benchmarks:
- name: copa
num_fewshot: 0
random_baseline: 0.5
- name: piqa
num_fewshot: 10
random_baseline: 0.5
- name: language_understanding_lite
benchmarks:
- name: lambada_openai
num_fewshot: 0
random_baseline: 0.0
- name: hellaswag
num_fewshot: 10
random_baseline: 0.25
- name: winograd
num_fewshot: 0
random_baseline: 0.5
- name: symbolic_problem_solving_lite
benchmarks:
- name: bigbench_elementary_math_qa
num_fewshot: 10
random_baseline: 0.25
- name: bigbench_dyck_languages
num_fewshot: 10
random_baseline: 0
- name: bigbench_operators
num_fewshot: 10
random_baseline: 0.0
- name: bigbench_repeat_copy_logic
num_fewshot: 10
random_baseline: 0.0
- name: simple_arithmetic_withspaces
num_fewshot: 10
random_baseline: 0.0
- name: simple_arithmetic_nospaces
num_fewshot: 10
random_baseline: 0.0
- name: reading_comprehension_lite
benchmarks:
- name: pubmed_qa_labeled
num_fewshot: 10
random_baseline: 0.0
- name: squad
num_fewshot: 10
random_baseline: 0
- name: programming_lite
benchmarks:
- name: human_eval
num_fewshot: 0
random_baseline: 0.0
2 changes: 1 addition & 1 deletion scripts/eval/yamls/tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ icl_tasks:
icl_task_type: multiple_choice
-
label: winogender_mc_male
dataset_uri: eval/local_data/safety/winogender_mc_female.jsonl # ADD YOUR OWN DATASET URI
dataset_uri: eval/local_data/safety/winogender_mc_male.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [10]
icl_task_type: multiple_choice
-
Expand Down

0 comments on commit b00a806

Please sign in to comment.