diff --git a/redshells/app/word_item_similarity/dimension_reduction_model.py b/redshells/app/word_item_similarity/dimension_reduction_model.py index ab8a028..acf070a 100644 --- a/redshells/app/word_item_similarity/dimension_reduction_model.py +++ b/redshells/app/word_item_similarity/dimension_reduction_model.py @@ -12,6 +12,7 @@ class DimensionReductionModel(object): """ Reduce the dimension of vector values with respect to its importance. The importance is calculated by sum of squared values. """ + def __init__(self, dimension_size: int) -> None: self.dimension_size = dimension_size self.top_n_indices = None diff --git a/redshells/contrib/model/early_stopping.py b/redshells/contrib/model/early_stopping.py index e68a980..f1442e1 100644 --- a/redshells/contrib/model/early_stopping.py +++ b/redshells/contrib/model/early_stopping.py @@ -11,6 +11,7 @@ class EarlyStopping(object): + def __init__(self, try_count=1, learning_rate=0., decay_speed=2.0, threshold=0.001, save_directory: str = None): self._save_path = os.path.join(save_directory, 'model.ckpt') if save_directory else None self._try_count = try_count diff --git a/redshells/contrib/model/factorization_machine.py b/redshells/contrib/model/factorization_machine.py index 53458bb..97aa842 100644 --- a/redshells/contrib/model/factorization_machine.py +++ b/redshells/contrib/model/factorization_machine.py @@ -18,6 +18,7 @@ class FactorizationMachineGraph(object): + def __init__(self, input_size: int, feature_kind_size: int, embedding_size: int, l2_weight: float, learning_rate: float, scope_name: str = '') -> None: with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): @@ -59,6 +60,7 @@ class FactorizationMachine(sklearn.base.BaseEstimator): For details of the algorithm, see "Factorization Machines" which is available at https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf """ + def __init__(self, embedding_size: int, l2_weight: float, diff --git a/redshells/contrib/model/feature_aggregation_similarity_model.py b/redshells/contrib/model/feature_aggregation_similarity_model.py index 8c2806e..31d2ccf 100644 --- a/redshells/contrib/model/feature_aggregation_similarity_model.py +++ b/redshells/contrib/model/feature_aggregation_similarity_model.py @@ -14,6 +14,7 @@ class FeatureAggregationSimilarityDataset(tf.keras.utils.Sequence): + def __init__(self, x_item_indices: np.ndarray, y_item_indices: np.ndarray, @@ -71,6 +72,7 @@ def __len__(self): class Average(tf.keras.layers.Layer): + def __init__(self, **kwargs): super(Average, self).__init__(**kwargs) @@ -94,6 +96,7 @@ def from_config(cls, config): class Clip(tf.keras.layers.Layer): + def __init__(self, **kwargs): super(Clip, self).__init__(**kwargs) @@ -116,6 +119,7 @@ def from_config(cls, config): class FeatureAggregationSimilarityGraph(object): + def __init__(self, feature_size: int, embedding_size: int, @@ -165,6 +169,7 @@ def clip(x): class FeatureAggregationSimilarityModel(object): + def __init__( self, embedding_size: int, diff --git a/redshells/contrib/model/gcmc_dataset.py b/redshells/contrib/model/gcmc_dataset.py index fa18587..03475b1 100644 --- a/redshells/contrib/model/gcmc_dataset.py +++ b/redshells/contrib/model/gcmc_dataset.py @@ -12,6 +12,7 @@ class GcmcDataset(object): + def __init__(self, user_ids: np.ndarray, item_ids: np.ndarray, @@ -28,6 +29,7 @@ def __init__(self, self.item_features = item_features def filter(self, user_ids: Set, item_ids: Set, ratings: Set): + def _is_valid(u, i, r): return u in user_ids and i in item_ids and r in ratings @@ -39,6 +41,7 @@ def _is_valid(u, i, r): class GcmcIdMap(object): + def __init__(self, ids: np.ndarray, features: Optional[List[Dict[Any, np.ndarray]]] = None, @@ -150,6 +153,7 @@ def _sort_features(cls, features: List[Dict[Any, np.ndarray]], order_map: Dict) class GcmcGraphDataset(object): + def __init__(self, dataset: GcmcDataset, test_size: float, min_user_click_count: int = 0, max_user_click_count: int = sys.maxsize) -> None: self._user = GcmcIdMap(dataset.user_ids, features=dataset.user_features, min_count=min_user_click_count, max_count=max_user_click_count) self._item = GcmcIdMap(dataset.item_ids, features=dataset.item_features) diff --git a/redshells/contrib/model/graph_convolutional_matrix_completion.py b/redshells/contrib/model/graph_convolutional_matrix_completion.py index 067ca5e..d9e7149 100644 --- a/redshells/contrib/model/graph_convolutional_matrix_completion.py +++ b/redshells/contrib/model/graph_convolutional_matrix_completion.py @@ -32,6 +32,7 @@ def _convert_sparse_matrix_to_sparse_tensor(x): class GraphConvolutionalMatrixCompletionGraph(object): + def __init__(self, n_rating: int, n_user: int, @@ -216,6 +217,7 @@ def _to_constant(x): class GraphConvolutionalMatrixCompletion(object): + def __init__(self, graph_dataset: GcmcGraphDataset, encoder_hidden_size: int, diff --git a/redshells/contrib/model/matrix_factorization_model.py b/redshells/contrib/model/matrix_factorization_model.py index 82b2af7..b5cbc06 100644 --- a/redshells/contrib/model/matrix_factorization_model.py +++ b/redshells/contrib/model/matrix_factorization_model.py @@ -16,6 +16,7 @@ class MatrixFactorizationGraph(object): + def __init__(self, n_items: int, n_users: int, n_latent_factors: int, n_services: int, reg_item: float, reg_user: float, scope_name: str, use_l2_upper_regularization: bool, average: float, standard_deviation: float) -> None: # placeholder @@ -91,6 +92,7 @@ def __init__(self, n_items: int, n_users: int, n_latent_factors: int, n_services class MatrixFactorization(object): + def __init__(self, n_latent_factors: int, learning_rate: float, diff --git a/redshells/factory/optuna_param_factory.py b/redshells/factory/optuna_param_factory.py index c39e335..1c83cb0 100644 --- a/redshells/factory/optuna_param_factory.py +++ b/redshells/factory/optuna_param_factory.py @@ -63,6 +63,7 @@ def _catboostclassifier_default(trial: optuna.trial.Trial): class _OptunaParamFactory(metaclass=Singleton): + def __init__(self): self._rules = dict() self._rules['XGBClassifier_default'] = _xgbclassifier_default diff --git a/redshells/factory/prediction_model_factory.py b/redshells/factory/prediction_model_factory.py index cfc4933..571852d 100644 --- a/redshells/factory/prediction_model_factory.py +++ b/redshells/factory/prediction_model_factory.py @@ -4,6 +4,7 @@ class _PredictionModelFactory(metaclass=Singleton): + def __init__(self): self._models = dict() try: diff --git a/redshells/model/lda_model.py b/redshells/model/lda_model.py index 4f25724..f30cd2b 100644 --- a/redshells/model/lda_model.py +++ b/redshells/model/lda_model.py @@ -12,6 +12,7 @@ class LdaModel(object): """TopicModel is a kind of wrapper of LdaModel in gensim module. """ + def __init__(self, n_topics: int, chunksize: int = 16, decay: float = 0.5, offset: int = 16, iterations: int = 3, eta: float = 1.0e-16) -> None: self.n_topics = n_topics self.chunksize = chunksize diff --git a/redshells/model/scdv.py b/redshells/model/scdv.py index f973e8b..97cc48f 100644 --- a/redshells/model/scdv.py +++ b/redshells/model/scdv.py @@ -17,6 +17,7 @@ class SCDV(object): See https://arxiv.org/pdf/1612.06778.pdf for details """ + def __init__(self, documents: List[List[str]], cluster_size: int, sparsity_percentage: float, gaussian_mixture_kwargs: Dict[Any, Any], dictionary: gensim.corpora.Dictionary, w2v: Union[FastText, Word2Vec]) -> None: """ diff --git a/redshells/model/tdidf.py b/redshells/model/tdidf.py new file mode 100644 index 0000000..2844aed --- /dev/null +++ b/redshells/model/tdidf.py @@ -0,0 +1,3 @@ +# for pickle backward compatibility +# https://github.com/m3dev/redshells/pull/56#discussion_r711488588 +from redshells.model.tfidf import Tfidf diff --git a/redshells/model/tfidf.py b/redshells/model/tfidf.py index 1fe7f77..930a92a 100644 --- a/redshells/model/tfidf.py +++ b/redshells/model/tfidf.py @@ -9,6 +9,7 @@ class Tfidf(object): + def __init__(self, dictionary: gensim.corpora.Dictionary, tokens: List[List[str]]) -> None: self.dictionary = dictionary self.tfidf = gensim.models.TfidfModel([dictionary.doc2bow(t) for t in tokens]) diff --git a/redshells/train/utils/token_iterator.py b/redshells/train/utils/token_iterator.py index f399137..31183a7 100644 --- a/redshells/train/utils/token_iterator.py +++ b/redshells/train/utils/token_iterator.py @@ -5,6 +5,7 @@ class TokenIterator(object): + def __init__(self, texts: List[str]) -> None: self.texts = texts self.i = 0 diff --git a/test/contrib/model/test_feature_aggregation_similarity_model.py b/test/contrib/model/test_feature_aggregation_similarity_model.py index 6d438fc..fbab2aa 100644 --- a/test/contrib/model/test_feature_aggregation_similarity_model.py +++ b/test/contrib/model/test_feature_aggregation_similarity_model.py @@ -10,6 +10,7 @@ class TestFeatureAggregationSimilarityModel(unittest.TestCase): + def test(self): model = FeatureAggregationSimilarityModel(embedding_size=7, learning_rate=0.001, feature_size=2, item_size=4, max_feature_index=5) dataset = FeatureAggregationSimilarityDataset(x_item_indices=np.array([0, 1, 2]), diff --git a/test/contrib/model/test_gcmc_graph_dataset.py b/test/contrib/model/test_gcmc_graph_dataset.py index 0b5dd1b..5e62326 100644 --- a/test/contrib/model/test_gcmc_graph_dataset.py +++ b/test/contrib/model/test_gcmc_graph_dataset.py @@ -13,6 +13,7 @@ def _make_sparse_matrix(n, m, n_values): class TestGcmcGraphDataset(unittest.TestCase): + def test(self): # This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough. n_users = 101 diff --git a/test/contrib/model/test_gcmc_graph_dataset_map.py b/test/contrib/model/test_gcmc_graph_dataset_map.py index 4359858..8f9f486 100644 --- a/test/contrib/model/test_gcmc_graph_dataset_map.py +++ b/test/contrib/model/test_gcmc_graph_dataset_map.py @@ -9,6 +9,7 @@ class TestGcmcGraphDataset(unittest.TestCase): + def setUp(self) -> None: dataset = GcmcDataset(user_ids=np.array([0, 1, 2]), item_ids=np.array([10, 11, 12]), ratings=np.array([100, 101, 102])) self.graph_dataset = GcmcGraphDataset(dataset=dataset, test_size=0.1) diff --git a/test/contrib/model/test_gcmc_id_map.py b/test/contrib/model/test_gcmc_id_map.py index e91d0ac..bbdc8a0 100644 --- a/test/contrib/model/test_gcmc_id_map.py +++ b/test/contrib/model/test_gcmc_id_map.py @@ -7,6 +7,7 @@ class TestGcmcIdMap(unittest.TestCase): + def test_initialize(self): ids = np.array([0, 0, 1, 2, 3, 3]) features = [{0: np.array([0]), 1: np.array([1])}] diff --git a/test/contrib/model/test_graph_convolutional_matrix_completion.py b/test/contrib/model/test_graph_convolutional_matrix_completion.py index c5d957f..f3ec3dd 100644 --- a/test/contrib/model/test_graph_convolutional_matrix_completion.py +++ b/test/contrib/model/test_graph_convolutional_matrix_completion.py @@ -15,6 +15,7 @@ def _make_sparse_matrix(n, m, n_values): class GraphConvolutionalMatrixCompletionTest(unittest.TestCase): + def test_run(self): # This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough. n_users = 101 diff --git a/test/contrib/train/test_gcmc_dataset.py b/test/contrib/train/test_gcmc_dataset.py index 576a0c3..26eacbf 100644 --- a/test/contrib/train/test_gcmc_dataset.py +++ b/test/contrib/train/test_gcmc_dataset.py @@ -10,6 +10,7 @@ class TestGCMCDataset(unittest.TestCase): + def test_without_information(self): user_ids = np.array([1, 1, 2, 2, 2]) item_ids = np.array([1, 2, 1, 2, 3]) diff --git a/test/contrib/train/test_train_feature_aggregation_similarity_model.py b/test/contrib/train/test_train_feature_aggregation_similarity_model.py index 69e0408..389cbbd 100644 --- a/test/contrib/train/test_train_feature_aggregation_similarity_model.py +++ b/test/contrib/train/test_train_feature_aggregation_similarity_model.py @@ -18,6 +18,7 @@ class _DummyTask(gokart.TaskOnKart): class TestTrainFeatureAggregationSimilarityModel(unittest.TestCase): + def setUp(self): self.input_data = None self.dump_data = None diff --git a/test/model/test_tfidf.py b/test/model/test_tfidf.py index 455e5df..0dd42cd 100644 --- a/test/model/test_tfidf.py +++ b/test/model/test_tfidf.py @@ -11,6 +11,7 @@ class TestTfidf(unittest.TestCase): + def test_apply_with_empty(self): texts = [random.choices(string.ascii_letters, k=100) for _ in range(100)] dictionary = gensim.corpora.Dictionary(texts) diff --git a/test/train/test_train_doc2vec.py b/test/train/test_train_doc2vec.py index 25497bb..37e4ebf 100644 --- a/test/train/test_train_doc2vec.py +++ b/test/train/test_train_doc2vec.py @@ -12,6 +12,7 @@ class _DummyTask(luigi.Task): class TrainDoc2VecTest(unittest.TestCase): + def setUp(self): self.input_data = None self.dump_data = None diff --git a/test/train/test_train_fasttext.py b/test/train/test_train_fasttext.py index 44cdd86..0968a71 100644 --- a/test/train/test_train_fasttext.py +++ b/test/train/test_train_fasttext.py @@ -11,6 +11,7 @@ class _DummyTask(gokart.TaskOnKart): class TrainFastTextTest(unittest.TestCase): + def setUp(self): self.input_data = None self.dump_data = None diff --git a/test/train/test_train_lda_model.py b/test/train/test_train_lda_model.py index 492c9ab..9aa8f47 100644 --- a/test/train/test_train_lda_model.py +++ b/test/train/test_train_lda_model.py @@ -12,6 +12,7 @@ class _DummyTask(luigi.Task): class TrainLdaModelTest(unittest.TestCase): + def setUp(self): self.input_data = None self.dump_data = None diff --git a/test/train/test_train_pairwise_similarity_model.py b/test/train/test_train_pairwise_similarity_model.py index 9dbcb40..83ab544 100644 --- a/test/train/test_train_pairwise_similarity_model.py +++ b/test/train/test_train_pairwise_similarity_model.py @@ -13,6 +13,7 @@ class _DummyTask(luigi.Task): class TrainPairwiseSimilarityModelTest(unittest.TestCase): + def setUp(self): self.input_data = dict() self.dump_data = None diff --git a/test/train/test_train_word2vec.py b/test/train/test_train_word2vec.py index 49dbfd6..a4072ee 100644 --- a/test/train/test_train_word2vec.py +++ b/test/train/test_train_word2vec.py @@ -12,6 +12,7 @@ class _DummyTask(luigi.Task): class TrainWord2VecTest(unittest.TestCase): + def setUp(self): self.input_data = None self.dump_data = None