diff --git a/trainingModel.ipynb b/trainingModel.ipynb index 76eeb1c..20273ad 100644 --- a/trainingModel.ipynb +++ b/trainingModel.ipynb @@ -1180,34 +1180,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "# def word_averaging(wv, words):\n", - "# \"\"\"Averages the word vectors for a list of words.\n", - "\n", - "# Args:\n", - "# words: A list of words.\n", - "# wv: A pre-trained Gensim Word2Vec model.\n", - "\n", - "# Returns:\n", - "# A numpy array containing the average word vector.\n", - "# \"\"\"\n", - "# all_words, mean = set(), []\n", - " \n", - "# for word in words:\n", - "# if isinstance(word, np.ndarray):\n", - "# mean.append(word)\n", - "# elif word in wv.key_to_index:\n", - "# mean.append(wv.vectors_norm[wv.key_to_index[word]])\n", - "# all_words.add(wv.vectors_norm[word])\n", - "\n", - "# if not mean:\n", - "# logging.warning(\"cannot compute similarity with no input %s\", words)\n", - "# # FIXME: remove these examples in pre-processing\n", - "# return np.zeros(wv.vector_size,)\n", - "\n", - "# mean = gensim.matutils.unitvec(np.array(mean).mean(axis=0)).astype(np.float32)\n", - "# return mean\n", - "\n", "\n", "def average_word_vectors(model, words):\n", " \"\"\"Averages the word vectors for a list of words.\n",