Added a new method so we can save our transformed word vectors, rathe…

…r than compute from scratch each time
babylonhealth · Apr 24, 2017 · 7a3f4c8 · 7a3f4c8
1 parent d591fbe
commit 7a3f4c8
Showing 1 changed file with 18 additions and 0 deletions.
diff --git a/fasttext.py b/fasttext.py
@@ -53,6 +53,24 @@ def apply_transform(self, transform):
         transmat = np.loadtxt(transform) if isinstance(transform, str) else transform
         self.embed = np.matmul(self.embed, transmat)
 
+    def export(self, outpath):
+        """
+        Transforming a large matrix of WordVectors is expensive. 
+        This method lets you write the transformed matrix back to a file for future use
+        :param The path to the output file to be written 
+        """
+        fout = open(outpath, "w")
+
+        # Header takes the guesswork out of loading by recording how many lines, vector dims
+        fout.write(str(self.n_words) + " " + str(self.n_dim) + "\n")
+        for k in self.word2id.keys():
+            vector_components = ["%.6f" % number for number in self[k]]
+            vector_as_string = " ".join(vector_components)
+
+            out_line = k + " " + vector_as_string + "\n"
+            fout.write(out_line)
+
+
     @classmethod
     def cosine_similarity(cls, vec_a, vec_b):
         """Compute cosine similarity between vec_a and vec_b"""