-
Notifications
You must be signed in to change notification settings - Fork 19
/
utils.cc
110 lines (98 loc) · 3.17 KB
/
utils.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include "utils.h"
using namespace std;
using namespace Eigen;
/* Try splitting over all whitespaces not just space */
vector<string> split_line(const string& line, char delim) {
vector<string> words;
stringstream ss(line);
string item;
while (std::getline(ss, item, delim)) {
if (!item.empty())
words.push_back(item);
}
return words;
}
void ReadVecsFromFile(const string& vec_file_name, mapStrUnsigned* t_vocab,
vector<Col>* word_vecs) {
ifstream vec_file(vec_file_name.c_str());
mapStrUnsigned& vocab = *t_vocab;
unsigned vocab_size = 0;
if (vec_file.is_open()) {
string line;
vocab.clear();
while (getline(vec_file, line)) {
vector<string> vector_stuff = split_line(line, ' ');
string word = vector_stuff[0];
Col word_vec = Col::Zero(vector_stuff.size()-1);
for (unsigned i = 0; i < word_vec.size(); ++i)
word_vec(i, 0) = stof(vector_stuff[i+1]);
vocab[word] = vocab_size++;
word_vecs->push_back(word_vec);
}
cerr << "Read: " << vec_file_name << endl;
cerr << "Vocab length: " << word_vecs->size() << endl;
cerr << "Vector length: " << (*word_vecs)[0].size() << endl << endl;
vec_file.close();
assert (word_vecs->size() == vocab.size());
} else {
cerr << "Could not open " << vec_file_name << endl;
exit(0);
}
}
void ReadVecsFromFile(const string& vec_file_name, mapUnsignedStr* t_vocab,
vector<Col>* word_vecs) {
ifstream vec_file(vec_file_name.c_str());
mapUnsignedStr& vocab = *t_vocab;
unsigned vocab_size = 0;
if (vec_file.is_open()) {
string line;
vocab.clear();
while (getline(vec_file, line)) {
vector<string> vector_stuff = split_line(line, ' ');
string word = vector_stuff[0];
Col word_vec = Col::Zero(vector_stuff.size()-1);
for (unsigned i = 0; i < word_vec.size(); ++i)
word_vec(i, 0) = stof(vector_stuff[i+1]);
vocab[vocab_size++] = word;
word_vecs->push_back(word_vec);
}
cerr << "Read: " << vec_file_name << endl;
cerr << "Vocab length: " << word_vecs->size() << endl;
cerr << "Vector length: " << (*word_vecs)[0].size() << endl << endl;
vec_file.close();
assert (word_vecs->size() == vocab.size());
} else {
cerr << "Could not open " << vec_file_name;
exit(0);
}
}
void ElemwiseTanh(Col* v) {
for (unsigned i = 0; i < v->rows(); ++i)
(*v)(i, 0) = tanh((*v)(i, 0));
}
/* v is the vector after taking tanh() */
void ElemwiseTanhGrad(const Col &v, Col* g) {
for (int i = 0; i < v.rows(); ++i)
(*g)(i, 0) = 1 - pow(v(i, 0), 2);
}
void ElemwiseAndrewNsnl(Col *v) {
for (int i = 0; i < v->rows(); ++i) {
double x = (*v)(i, 0);
if (x) {
bool flag = (x < 0);
double y_n = flag ? -x : x;
for (unsigned i = 0; i < 12; ++i) {
const double sq = y_n * y_n;
y_n = (2 * sq * y_n / 3 + x) / (sq + 1);
}
(*v)(i, 0) = flag ? -y_n : y_n;
}
}
}
void ElemwiseAndrewNsnlGrad(const Col &v, Col* g) {
for (int i = 0; i < v.rows(); ++i)
(*g)(i, 0) = 1 / (1 + pow(v(i, 0), 2));
}
double CosineSim(const Col& ci, const Col& cj) {
return ci.dot(cj)/sqrt(ci.squaredNorm() * cj.squaredNorm());
}