From 7a3f4c8dfcbb872fea4c2cc90743ab91f04657b9 Mon Sep 17 00:00:00 2001 From: Edward Dixon Date: Mon, 24 Apr 2017 16:50:02 +0100 Subject: [PATCH] Added a new method so we can save our transformed word vectors, rather than compute from scratch each time --- fasttext.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fasttext.py b/fasttext.py index 20a6e3f..a55b468 100644 --- a/fasttext.py +++ b/fasttext.py @@ -53,6 +53,24 @@ def apply_transform(self, transform): transmat = np.loadtxt(transform) if isinstance(transform, str) else transform self.embed = np.matmul(self.embed, transmat) + def export(self, outpath): + """ + Transforming a large matrix of WordVectors is expensive. + This method lets you write the transformed matrix back to a file for future use + :param The path to the output file to be written + """ + fout = open(outpath, "w") + + # Header takes the guesswork out of loading by recording how many lines, vector dims + fout.write(str(self.n_words) + " " + str(self.n_dim) + "\n") + for k in self.word2id.keys(): + vector_components = ["%.6f" % number for number in self[k]] + vector_as_string = " ".join(vector_components) + + out_line = k + " " + vector_as_string + "\n" + fout.write(out_line) + + @classmethod def cosine_similarity(cls, vec_a, vec_b): """Compute cosine similarity between vec_a and vec_b"""