-
Notifications
You must be signed in to change notification settings - Fork 0
/
models.py
139 lines (110 loc) · 5.25 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# models.py
# classification models to use
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, AutoModelForSequenceClassification
from scipy.special import softmax
import numpy as np
def _matrix_softmax(matrix):
expd = torch.exp(matrix)
return expd / expd.sum(dim=(1, 2), keepdim=True)
class Model_Classification:
'''
Model based on Classifier directly trained on predicting if an answer exists. Default model is
https://huggingface.co/a-ware/roberta-large-squad-classification
'''
def __init__(self, device='cpu'):
self.device = torch.device(device)
self.network = AutoModelForSequenceClassification.from_pretrained("a-ware/roberta-large-squad-classification").to(self.device)
self.tokenizer = AutoTokenizer.from_pretrained("a-ware/roberta-large-squad-classification")
def predict(self, question, context):
'''
Predict class
Parameters
----------
question: (str or list(str))
The questions
context: (str or list(str))
The contexts used to answer the questions
'''
input_dict = self.tokenizer(question, context, padding=True, truncation=True, return_tensors='pt').to(self.device)
return self.network(**input_dict)[0].argmax(axis=1).cpu().numpy()
def predict_proba(self, question, context):
'''
Predict class probability (numpy output)
Parameters
----------
question: (str or list(str))
The questions
context: (str or list(str))
The contexts used to answer the questions
'''
input_dict = self.tokenizer(question, context, padding=True, truncation=True, return_tensors='pt').to(self.device)
logits = self.network(**input_dict)[0].detach().cpu().numpy()
return softmax(logits, axis=1)
def predict_proba_torch(self, input_data, input_dict):
'''
Predict class probabilities (torch output with gradient), only to be used in conjunction with interpretable embeddings layer from captum
Parameters
----------
input_data:
Input for torch model
'''
logits = self.network(input_data, attention_mask=input_dict['attention_mask'])[0]
return torch.nn.functional.softmax(logits)[:, 1]
class Model_QA:
'''
Model based on Question Answering model. Default model is
https://huggingface.co/twmkn9/albert-base-v2-squad2?text=What%27s+my+name%3F&context=My+name+is+Clara+and+I+live+in+Berkeley.
'''
def __init__(self, device='cpu'):
self.device = torch.device(device)
self.network = AutoModelForQuestionAnswering.from_pretrained("twmkn9/albert-base-v2-squad2").to(self.device)
self.tokenizer = AutoTokenizer.from_pretrained("twmkn9/albert-base-v2-squad2")
def predict(self, question, context):
'''
Predict class
Parameters
----------
question: (str or list(str))
The questions
context: (str or list(str))
The contexts used to answer the questions
'''
return (self.predict_proba(question, context)[:, 1] > 0.5).astype(int)
def predict_proba_torch(self, input_data, input_dict):
'''
Predict class probability (torch output with gradient), only to be used in conjunction with interpretable embeddings layer from captum
Parameters
----------
input_data:
Input for torch model
'''
start_scores, end_scores = self.network(input_data, attention_mask=input_dict['attention_mask'])
ps = _matrix_softmax(start_scores.unsqueeze(2).transpose(1, 2) + end_scores.unsqueeze(2))
ltri = torch.tril(ps)
for i, ids in enumerate(input_dict['token_type_ids']):
ltri[i, :, (torch.nonzero(ids.squeeze())[0][0]):] = 0
negative_answers = torch.triu(ps, diagonal=1).sum(dim=(1, 2)) # create the upper triangular matrix of invalid spans (end<start)
answer_in_question = ltri.sum(dim=(1, 2)) # extract all the answers starting in the question
P_answer = 1 - negative_answers - answer_in_question
return P_answer
def predict_proba(self, question, context):
'''
Predict class probability (numpy output)
Parameters
----------
question: (str or list(str))
The questions
context: (str or list(str))
The contexts used to answer the questions
'''
input_dict = self.tokenizer(question, context, padding=True, truncation=True, return_tensors='pt').to(self.device)
start_scores, end_scores = self.network(**input_dict)
ps = _matrix_softmax(start_scores.unsqueeze(2).transpose(1, 2) + end_scores.unsqueeze(2))
ltri = torch.tril(ps)
for i, ids in enumerate(input_dict['token_type_ids']):
ltri[i, :, (torch.nonzero(ids.squeeze())[0][0]):] = 0
negative_answers = torch.triu(ps, diagonal=1).sum(dim=(1, 2)) # create the upper triangular matrix of invalid spans (end<start)
answer_in_question = ltri.sum(dim=(1, 2)) # extract all the answers starting in the question
P_answer = (1 - negative_answers - answer_in_question).detach().cpu().numpy()
return np.vstack((1 - P_answer, P_answer)).T