| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
'''Sample baseline averaging algorithms.''' |
|---|
| 4 |
|
|---|
| 5 |
import numpy as N |
|---|
| 6 |
from pyflix.algorithms import Algorithm |
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
class MovieAverage(Algorithm): |
|---|
| 10 |
'''Baseline algorithm that computes the average of all the votes for a movie |
|---|
| 11 |
and predicts that for every user. |
|---|
| 12 |
|
|---|
| 13 |
This algorithm returns an RMSE score of 1.0528 on the scrubbed dataset. |
|---|
| 14 |
''' |
|---|
| 15 |
|
|---|
| 16 |
def __init__(self, training_set): |
|---|
| 17 |
self._movie_averages = {} |
|---|
| 18 |
super(MovieAverage,self).__init__(training_set) |
|---|
| 19 |
|
|---|
| 20 |
def __call__(self, movie_id, user_id): |
|---|
| 21 |
try: return self._movie_averages[movie_id] |
|---|
| 22 |
except KeyError: |
|---|
| 23 |
avg = N.average(self._training_set.movie(movie_id).ratings()) |
|---|
| 24 |
self._movie_averages[movie_id] = avg |
|---|
| 25 |
return avg |
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 |
class UserAverage(Algorithm): |
|---|
| 29 |
'''Baseline algorithm that computes the average of all the votes for a user |
|---|
| 30 |
and predicts that for every movie. |
|---|
| 31 |
|
|---|
| 32 |
This algorithm returns an RMSE score of 1.0688 on the scrubbed dataset. |
|---|
| 33 |
''' |
|---|
| 34 |
|
|---|
| 35 |
def __init__(self, training_set): |
|---|
| 36 |
self._user_averages = {} |
|---|
| 37 |
super(UserAverage,self).__init__(training_set) |
|---|
| 38 |
|
|---|
| 39 |
def __call__(self, movie_id, user_id): |
|---|
| 40 |
try: return self._user_averages[user_id] |
|---|
| 41 |
except KeyError: |
|---|
| 42 |
avg = N.average(self._training_set.user(user_id).ratings()) |
|---|
| 43 |
self._user_averages[user_id] = avg |
|---|
| 44 |
return avg |
|---|
| 45 |
|
|---|
| 46 |
|
|---|
| 47 |
class DoubleAverage(MovieAverage,UserAverage): |
|---|
| 48 |
'''Returns the average of L{MovieAverage} and L{UserAverage}. |
|---|
| 49 |
|
|---|
| 50 |
This algorithm returns an RMSE score of 1.0158 on the scrubbed dataset. |
|---|
| 51 |
''' |
|---|
| 52 |
|
|---|
| 53 |
def __call__(self, movie_id, user_id): |
|---|
| 54 |
return (MovieAverage.__call__(self,movie_id,user_id) + |
|---|
| 55 |
UserAverage.__call__(self,movie_id,user_id)) / 2 |
|---|