-
Notifications
You must be signed in to change notification settings - Fork 0
/
writers.py
58 lines (48 loc) · 2.18 KB
/
writers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import csv
import numpy as np
import utility as util
"""
Write to movie_avg_rating.csv
1st column: movie index
2nd column: avg rating
"""
def write_user_avg_rating():
user_data = util.import_data('resources/users.csv', ';')
ratings_data = util.import_data('resources/ratings.csv', ';')
with open('resources/user_avg_rating.csv', 'w', newline='') as avg_rating_file:
avg_rating_writer = csv.writer(avg_rating_file, delimiter=';')
for row in user_data:
user_index = int(row[0])
avg_rating = float( np.mean([float(row[2]) for row in ratings_data if int(row[0]) == user_index]) )
avg_rating_writer.writerow([user_index, round(avg_rating, 5)])
"""
Write to movie_avg_rating.csv
1st column: movie index
2nd column: avg rating
"""
def write_movie_avg_ratings():
movie_data = util.import_data('resources/movies.csv', ';')
ratings_data = util.import_data('resources/ratings.csv', ';')
with open('resources/movie_avg_rating.csv', 'w', newline='') as avg_rating_file:
avg_rating_writer = csv.writer(avg_rating_file, delimiter=';')
for row in movie_data:
movie_index = int(row[0])
avg_rating = float( np.mean([float(row[2]) for row in ratings_data if int(row[1]) == movie_index]) )
avg_rating_writer.writerow([movie_index, round(avg_rating, 5)])
def write_k_neighbour():
movie_avg_ratings_data = util.import_data('resources/movie_avg_rating.csv', ';')
ratings_data = util.import_data('resources/ratings.csv', ';')
movie_data = util.import_data('resources/movies.csv', ';')
movies_len = len(movie_data)
with open('resources/k_neighbours.csv', 'a', newline='') as k_neighbours_file:
k_neighbours_writer = csv.writer(k_neighbours_file, delimiter=';')
for i in range(3256, movies_len+1):
movie_x_and_k_neighbours = util.find_k_neighbours(
x_movie_index=i,
movies_len=movies_len,
movie_avg_ratings_data=movie_avg_ratings_data,
ratings_data=ratings_data,
similarity_threshold=0.5,
k=10,
)
k_neighbours_writer.writerow(movie_x_and_k_neighbours)