-
Notifications
You must be signed in to change notification settings - Fork 1
/
templates.py
106 lines (91 loc) · 3.28 KB
/
templates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Templates hold (question, answer, query) triplets
SEP = "\n##\n" # separator between few-shot examples
TEMPLATES = {
# Binary classification tasks
"piqa": (
"Goal: {goal}\n",
"Answer: {}",
"Answer:"
),
"superglue-wic": (
"{sentence1}\n{sentence2}\nquestion: Is the word '{word}' used in the same sense in the two sentences above?\n",
"answer: {}",
"answer:"
),
"superglue-boolq": (
"{passage}\nquestion: {question}?\n",
"answer: {}",
"answer:"
),
"superglue-cb": (
"{premise}\nquestion: {hypothesis}. true, false, or neither?\n",
"answer: {}",
"answer:"
),
"superglue-wsc": (
"Passage: {text}\nQuestion: In the passage above, does the pronoun '{span2_text}' refer to {span1_text}?\n",
"Answer: {}",
"Answer:"
),
"superglue-rte": (
"{premise}\nquestion: {hypothesis}. true or false?\n",
"answer: {}",
"answer:"
),
# Multi-choice tasks
"ai2_arc_easy": (
"Question: {question}\n",
"Answer: {}",
"Answer:"
),
"ai2_arc_challenge": (
"Question: {question}\n",
"Answer: {}",
"Answer:"
),
"hellaswag": (
"Context: {context}\n",
"Answer: {}",
"Answer:"
),
"openbookqa": (
"Context: {context}\n",
"Answer: {}",
"Answer:"
),
}
def apply_template(dp, dataset) -> dict:
# Binary
if dataset == "superglue-wic":
splits = dp["input"].split(" [SEP] ")
dp["input"] = TEMPLATES[dataset][0].format(sentence1=splits[0], sentence2=splits[1], word=splits[2])
dp["output"] = TEMPLATES[dataset][1].format(dp["output"])
elif dataset == "superglue-boolq":
splits = dp["input"].split(" [SEP] ")
dp["input"] = TEMPLATES[dataset][0].format(passage=splits[0], question=splits[1])
dp["output"] = TEMPLATES[dataset][1].format(dp["output"])
elif dataset == "superglue-cb":
splits = dp["input"].split(" [SEP] ")
dp["input"] = TEMPLATES[dataset][0].format(premise=splits[0], hypothesis=splits[1])
dp["output"] = TEMPLATES[dataset][1].format(dp["output"])
elif dataset == "superglue-wsc":
splits = dp["input"].split(" [SEP] ")
dp["input"] = TEMPLATES[dataset][0].format(text=splits[0], span2_text=splits[2], span1_text=splits[1])
dp["output"] = TEMPLATES[dataset][1].format(dp["output"])
elif dataset == "superglue-rte":
splits = dp["input"].split(" [SEP] ")
dp["input"] = TEMPLATES[dataset][0].format(premise=splits[0], hypothesis=splits[1])
dp["output"] = TEMPLATES[dataset][1].format(dp["output"])
# Multi-choice
elif dataset == "piqa":
dp["input"] = TEMPLATES[dataset][0].format(goal=dp["input"])
elif dataset == "ai2_arc_easy":
dp["input"] = TEMPLATES[dataset][0].format(question=dp["input"])
elif dataset == "ai2_arc_challenge":
dp["input"] = TEMPLATES[dataset][0].format(question=dp["input"])
elif dataset == "hellaswag":
dp["input"] = TEMPLATES[dataset][0].format(context=dp["input"])
elif dataset == "openbookqa":
dp["input"] = TEMPLATES[dataset][0].format(context=dp["input"])
else:
raise NotImplementedError(dataset)