-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeatures.py
executable file
·80 lines (62 loc) · 2.33 KB
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import sys
import os
#sys.path.append(os.path.join('/export/home/u14/atn/research/fact/ferreira'))
sys.path.append(os.path.join('ferreira'))
from model.utils import get_dataset, split_data, RunCV, run_test
from model.classifiers.lr_predictors import LogitPredictor
from model.baseline.transforms import (
RefutingWordsTransform,
QuestionMarkTransform,
HedgingWordsTransform,
InteractionTransform,
NegationOfRefutingWordsTransform,
BoWTransform,
PolarityTransform,
BrownClusterPairTransform
)
from model.ext.transforms import (
AlignedPPDBSemanticTransform,
NegationAlignmentTransform,
Word2VecSimilaritySemanticTransform,
DependencyRootDepthTransform,
SVOTransform
)
transforms = {
'BoW': lambda: BoWTransform(),
'Q': QuestionMarkTransform,
'W2V': Word2VecSimilaritySemanticTransform,
'PPDB': AlignedPPDBSemanticTransform,
'NegAlgn': NegationAlignmentTransform,
'RootDep': DependencyRootDepthTransform,
'SVO': SVOTransform,
}
list_tranforms = [lambda: BoWTransform(), # 500
QuestionMarkTransform, # 1
Word2VecSimilaritySemanticTransform, # 1
AlignedPPDBSemanticTransform, # 1
NegationAlignmentTransform, # 1
DependencyRootDepthTransform, # 2
SVOTransform] # 12
predictor = LogitPredictor
#p = predictor(transforms.values() )
p = predictor(list_tranforms )
def get_data():
train_data = get_dataset('url-versions-2015-06-14-clean-train.csv')
X, y = split_data(train_data)
X = p.pipeline.fit_transform(X)
train_data1 = train_data[:1489]
train_data2 = train_data[1489:]
X1 = X[:1489]
X2 = X[1489:]
test_data = get_dataset('url-versions-2015-06-14-clean-test.csv')
X_test, y_test = split_data(test_data)
X_test = p.pipeline.transform(X_test)
# return train/ validation/ test
return (train_data1, X1, train_data2, X2, test_data, X_test)
def get_snopes():
test_data = get_dataset("my_claims_csv_cleaned.csv")
X_test, y_test = split_data(test_data)
X_test = p.pipeline.transform(X_test)
return(test_data, X_test)