-
Notifications
You must be signed in to change notification settings - Fork 2
/
emoji_similarlity.py
38 lines (25 loc) · 1.5 KB
/
emoji_similarlity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity,euclidean_distances,pairwise_distances
class Emoji_similarlity():
def __init__(self):
self.df = pd.read_csv("./Dataset/Trans_Name.csv")
self.tfidfvect = TfidfVectorizer(analyzer="char_wb", ngram_range=(1,4))
self.dtm = self.tfidfvect.fit_transform(self.df["Tran_Name"])
def cosine_ver(self, target):
target_dtm = self.tfidfvect.transform([target])
df_cos = pd.DataFrame(cosine_similarity(self.dtm, target_dtm), columns=["cosine_ver"])
return(pd.concat([self.df, df_cos], axis=1).nlargest(10, "cosine_ver"))
def euclidean_ver(self, target):
target_dtm = self.tfidfvect.transform([target])
df_cos = pd.DataFrame(cosine_similarity(self.dtm, target_dtm), columns=["cosine_ver"])
return(pd.concat([self.df, df_cos], axis=1).nlargest(10, "cosine_ver"))
def jaccard_ver(self, target):
target_dtm = self.tfidfvect.transform([target])
df_cos = pd.DataFrame(1 - pairwise_distances(self.dtm.toarray(), metric="jaccard"))
return(pd.concat([self.df, df_cos], axis=1).nlargest(10, "cosine_ver"))
def pearson_ver(self, target):
target_dtm = self.tfidfvect.transform([target])
df_cos = pd.DataFrame(np.corrcoef(self.dtm.toarray()))
return(pd.concat([self.df, df_cos], axis=1).nlargest(10, "cosine_ver"))