Skip to content

Commit

Permalink
Merge branch 'release/release_v0.3.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
evfro committed Jul 23, 2017
2 parents affceac + 20317b7 commit 3283d40
Show file tree
Hide file tree
Showing 13 changed files with 1,041 additions and 359 deletions.
38 changes: 14 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ A special effort was made to make a *recsys for humans*, which stresses on the e
from polara.recommender.data import RecommenderData
from polara.recommender.models import SVDModel
from polara.tools.movielens import get_movielens_data

# get data and convert it into appropriate format
ml_data = get_movielens_data(get_genres=False)
data_model = RecommenderData(ml_data, 'userid', 'movieid', 'rating')
# build PureSVD model and evaluate it
svd = SVDModel(data_model)
svd.build()
svd.evaluate()
Expand All @@ -44,44 +45,33 @@ svd.evaluate()
## Creating new recommender models
Basic models can be extended by subclassing `RecommenderModel` class and defining two required methods: `self.build()` and `self.get_recommendations()`. Here's an example of a simple item-to-item recommender model:
```python
import scipy as sp
import scipy.sparse
from scipy.sparse import csr_matrix
import numpy as np
from polara.recommender.models import RecommenderModel

class CooccurrenceModel(RecommenderModel):
def __init__(self, *args, **kwargs):
super(CooccurrenceModel, self).__init__(*args, **kwargs)
self.method = 'item-to-item' #pick some meaningful name
self.implicit = True # will convert feedback values to all ones
self.method = 'item-to-item' # pick some meaningful name

def build(self):
self._recommendations = None
idx, val, shp = self.data.to_coo()
if self.implicit:
val = np.ones_like(val)
user_item_matrix = csr_matrix((val, (idx[:, 0], idx[:, 1])),
shape=shp, dtype=np.float64)

i2i_matrix = user_item_matrix.T.dot(user_item_matrix)
#exclude "self-links"
diag_vals = i2i_matrix.diagonal()
i2i_matrix -= sp.sparse.dia_matrix((diag_vals, 0), shape=i2i_matrix.shape)
# build model - calculate item-to-item matrix
user_item_matrix = self.get_training_matrix()
# rating matrix product R^T R gives cooccurrences count
i2i_matrix = user_item_matrix.T.dot(user_item_matrix) # gives CSC format
i2i_matrix.setdiag(0) # exclude "self-links"
i2i_matrix.eliminate_zeros() # ensure only non-zero elements are stored
# store matrix for generating recommendations
self._i2i_matrix = i2i_matrix

def get_recommendations(self):
test_data = self.data.test_to_coo()
test_shape = self.data.get_test_shape()
# get test users information and generate top-k recommendations
test_data, test_shape = self._get_test_data()
test_matrix, _ = self.get_test_matrix(test_data, test_shape)
if self.implicit:
test_matrix.data = np.ones_like(test_matrix.data)

# calculate predicted scores
i2i_scores = test_matrix.dot(self._i2i_matrix)
if self.filter_seen:
# prevent seen items from appearing in recommendations
self.downvote_seen_items(i2i_scores, test_data)

# generate top-k recommendations for every test user
top_recs = self.get_topk_items(i2i_scores)
return top_recs
```
Expand Down
31 changes: 7 additions & 24 deletions polara/evaluation/evaluation_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ def build_models(models):
model.build()


def refresh_models(models):
for model in models:
model._recommendations = None


def consolidate(scores, params, metrics):
res = {}
for i, metric in enumerate(metrics):
Expand All @@ -72,45 +67,32 @@ def consolidate_folds(scores, folds, metrics, index_names = ['fold', 'top-n']):
def holdout_test_pair(model1, model2, holdout_sizes=[1], metrics=['hits']):
holdout_scores = []
models = [model1, model2]
check_updates(models)

data1 = model1.data
data2 = model2.data
for i in holdout_sizes:
print i,
data1.holdout_size = i
data2.holdout_size = i
data1.update()
data1.update()
data2.holdout_size = i
data2.update()

refresh_models(models)
metric_scores = evaluate_models(models, metrics)
holdout_scores.append(metric_scores)

return consolidate(holdout_scores, holdout_sizes, metrics)


def check_updates(models):
data = models[0].data
if data.has_changed: #Rebuild models entirely
print 'Data has been changed. Rebuiding the models.'
build_models(models)
elif data.has_updated: #just force recommendations renewal
print 'Test data has been updated. Refreshing the models.'
refresh_models(models)


def holdout_test(models, holdout_sizes=[1], metrics=['hits']):
#check_updates(models) #will rebuild or renew models if data was manipulated in previous experiments
holdout_scores = []
data = models[0].data
assert all([model.data is data for model in models[1:]]) #check that data is shared across models

build_models(models)
for i in holdout_sizes:
print i,
data.holdout_size = i
data.update() #can be omitted but it's more safe
refresh_models(models) #test data is updated - clear old recommendations
data.update()

metric_scores = evaluate_models(models, metrics)
holdout_scores.append(metric_scores)
Expand All @@ -121,11 +103,12 @@ def holdout_test(models, holdout_sizes=[1], metrics=['hits']):
def topk_test(models, topk_list=[10], metrics=['hits']):
topk_scores = []
data = models[0].data
data.update()
check_updates(models) #will rebuild or renew models if data was manipulated in previous experiments
assert all([model.data is data for model in models[1:]]) #check that data is shared across models

data.update()
topk_list = list(reversed(sorted(topk_list))) #start from max topk and rollback

build_models(models)
for topk in topk_list:
print topk,
metric_scores = evaluate_models(models, metrics, topk)
Expand Down
Loading

0 comments on commit 3283d40

Please sign in to comment.