cemoody · maximus12793 · Oct 17, 2017 · Oct 17, 2017
diff --git a/lda2vec/corpus.py b/lda2vec/corpus.py
@@ -1,6 +1,5 @@
 from collections import defaultdict
 import numpy as np
-import difflib
 import pandas as pd
 
 try:
@@ -531,8 +530,10 @@ def compact_word_vectors(self, vocab, filename=None, array=None,
         True
         """
         n_words = len(self.compact_to_loose)
-        from gensim.models.word2vec import Word2Vec
-        model = Word2Vec.load_word2vec_format(filename, binary=True)
+        import gensim
+        model = gensim.models.KeyedVectors.load_word2vec_format(
+            filename, binary=True)
+
         n_dim = model.syn0.shape[1]
         data = np.random.normal(size=(n_words, n_dim)).astype('float32')
         data -= data.mean()
@@ -548,9 +549,12 @@ def compact_word_vectors(self, vocab, filename=None, array=None,
         choices = np.array(keys, dtype='S')
         lengths = np.array(lens, dtype='int32')
         s, f = 0, 0
-        rep0 = lambda w: w
-        rep1 = lambda w: w.replace(' ', '_')
-        rep2 = lambda w: w.title().replace(' ', '_')
+
+        def rep0(w): return w
+
+        def rep1(w): return w.replace(' ', '_')
+
+        def rep2(w): return w.title().replace(' ', '_')
         reps = [rep0, rep1, rep2]
         for compact in np.arange(top):
             loose = self.compact_to_loose.get(compact, None)
@@ -574,7 +578,6 @@ def compact_word_vectors(self, vocab, filename=None, array=None,
                     sel = choices[idx]
                     d = damerau_levenshtein_distance_withNPArray(word, sel)
                     choice = np.array(keys_raw)[idx][np.argmin(d)]
-                    # choice = difflib.get_close_matches(word, choices)[0]
                     vector = model[choice]
                     print compact, word, ' --> ', choice
                 except IndexError:
@@ -677,8 +680,9 @@ def compact_to_coocurrence(self, word_compact, indices, window_size=10):
         for name, index in indices.items():
             tokens[name] = index
         a, b = tokens.copy(), tokens.copy()
-        mask = lambda x: np.prod([x[k + '_x'] == x[k + '_y']
-                                  for k in indices.keys()], axis=0)
+
+        def mask(x): return np.prod([x[k + '_x'] == x[k + '_y']
+                                     for k in indices.keys()], axis=0)
         group_keys = ['word_index_x', 'word_index_y', ]
         group_keys += [k + '_x' for k in indices.keys()]
         total = []