generated from JadeKim042386/Deployment-fastapi-with-flyio-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
67 lines (47 loc) · 1.57 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
import os.path as osp
from numpy import dot
from numpy.linalg import norm
from transformers import AutoTokenizer
from models import ProductCategory
from config import PROPERTIES
LOCAL_FILE_PATH = PROPERTIES['LOCAL_FILE_PATH']
"""코사인 유사도"""
def cos_sim(A, B):
return dot(A, B)/(norm(A)*norm(B))
def save_numpy(path, arr):
with open(osp.join(LOCAL_FILE_PATH, path), 'wb') as f:
np.save(f, arr)
def load_numpy(path):
with open(osp.join(LOCAL_FILE_PATH, path), 'rb') as f:
arr = np.load(f, allow_pickle=True)
return arr
class SingletonInstane:
__instance = None
@classmethod
def __getInstance(cls):
return cls.__instance
@classmethod
def instance(cls, *args, **kargs):
cls.__instance = cls(*args, **kargs)
cls.instance = cls.__getInstance
return cls.__instance
class Tokenizer(SingletonInstane):
_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
def __init__(self):
return
def get_vector(self, content, category_idx):
vector = self.get_empty_vector()
indices = np.append(self.__encode(content).squeeze(axis=0), np.array([category_idx]), axis=0)
vector[indices] += 1
return vector
def get_empty_vector(self):
return np.append([0] * Tokenizer._tokenizer.vocab_size, np.array([0] * ProductCategory.__len__()), axis=0)
def __encode(self, content):
return Tokenizer._tokenizer.encode(content, return_tensors='np')
class FileUtils:
def __init__(self):
return
@staticmethod
def existsFile(path):
return osp.isfile(osp.join(LOCAL_FILE_PATH, path))