Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simple depreciation fix #60

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions lda2vec/corpus.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from collections import defaultdict
import numpy as np
import difflib
import pandas as pd

try:
Expand Down Expand Up @@ -531,8 +530,10 @@ def compact_word_vectors(self, vocab, filename=None, array=None,
True
"""
n_words = len(self.compact_to_loose)
from gensim.models.word2vec import Word2Vec
model = Word2Vec.load_word2vec_format(filename, binary=True)
import gensim
model = gensim.models.KeyedVectors.load_word2vec_format(
filename, binary=True)

n_dim = model.syn0.shape[1]
data = np.random.normal(size=(n_words, n_dim)).astype('float32')
data -= data.mean()
Expand All @@ -548,9 +549,12 @@ def compact_word_vectors(self, vocab, filename=None, array=None,
choices = np.array(keys, dtype='S')
lengths = np.array(lens, dtype='int32')
s, f = 0, 0
rep0 = lambda w: w
rep1 = lambda w: w.replace(' ', '_')
rep2 = lambda w: w.title().replace(' ', '_')

def rep0(w): return w

def rep1(w): return w.replace(' ', '_')

def rep2(w): return w.title().replace(' ', '_')
reps = [rep0, rep1, rep2]
for compact in np.arange(top):
loose = self.compact_to_loose.get(compact, None)
Expand All @@ -574,7 +578,6 @@ def compact_word_vectors(self, vocab, filename=None, array=None,
sel = choices[idx]
d = damerau_levenshtein_distance_withNPArray(word, sel)
choice = np.array(keys_raw)[idx][np.argmin(d)]
# choice = difflib.get_close_matches(word, choices)[0]
vector = model[choice]
print compact, word, ' --> ', choice
except IndexError:
Expand Down Expand Up @@ -677,8 +680,9 @@ def compact_to_coocurrence(self, word_compact, indices, window_size=10):
for name, index in indices.items():
tokens[name] = index
a, b = tokens.copy(), tokens.copy()
mask = lambda x: np.prod([x[k + '_x'] == x[k + '_y']
for k in indices.keys()], axis=0)

def mask(x): return np.prod([x[k + '_x'] == x[k + '_y']
for k in indices.keys()], axis=0)
group_keys = ['word_index_x', 'word_index_y', ]
group_keys += [k + '_x' for k in indices.keys()]
total = []
Expand Down