Model Based Recommeder System¶
Model-based recommendation is a type of recommendation system that relies on building a predictive model based on the user-item interactions or other relevant features present in the dataset. Unlike memory-based collaborative filtering techniques that directly use the user-item interactions, model-based approaches create a model to predict user preferences and provide recommendations based on this model.
References:
In [4]:
import sys
print(sys.version)
print(sys.executable)
import pandas as pd
import numpy as np
import scipy
import surprise
%load_ext watermark
%watermark -iv
In [2]:
if 'google.colab' in sys.modules:
!wget -O books.csv "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
!wget -O ratings.csv "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv"
!pip uninstall numpy -y
!pip install numpy==1.19.5
!pip uninstall scikit-surprise -y
!pip install scikit-surprise
In [3]:
from surprise import Reader, SVD, Dataset, accuracy
from surprise.model_selection import GridSearchCV, train_test_split, cross_validate
Data¶
In [5]:
from pathlib import Path
if 'google.colab' not in sys.modules:
path_data = Path.home() / 'github/Recommender_System/data/goodbooks_10k'
books = pd.read_csv(path_data / 'books.csv').head(1000)
ratings = pd.read_csv(path_data / 'ratings.csv').head(5000)
print(books.shape)
print(ratings.shape)
display(books.head(2))
display(ratings.head(2))
In [6]:
print(books.shape)
print(books.columns)
books.head(2)
Out[6]:
In [7]:
books_cols = ['book_id', 'authors', 'original_publication_year', 'title', 'average_rating']
books2 = books[books_cols]
books2.head(2)
Out[7]:
In [8]:
print(ratings.shape)
print(ratings.columns)
ratings.head(2)
Out[8]:
In [9]:
df = pd.merge(books, ratings, on="book_id", how="inner")
print(df.shape)
df.head(2)
Out[9]:
Model Based Recommender Engine¶
In [10]:
user_id = df["user_id"].iloc[0]
user_id
Out[10]:
In [11]:
sample_df = df[df["user_id"]==user_id]
print(sample_df.shape)
sample_df.head(2)
Out[11]:
In [12]:
# give a scale that beetwen 1 and 5 for ratings using surprise method Reader
reader = Reader(rating_scale=(1, 5))
In [13]:
data = Dataset.load_from_df(df[['user_id','book_id','rating']], reader)
In [14]:
trainset, testset = train_test_split(data,random_state=42, test_size=.25)
In [15]:
svd_model = SVD(random_state=42)
svd_model.fit(trainset)
Out[15]:
In [16]:
predictions = svd_model.test(testset)
In [17]:
# books that our sample didnt read
didnt_read = df["book_id"][~(df["user_id"] == user_id)].drop_duplicates().values.tolist()
In [18]:
def suggest(df,user_id,sug):
didnt_read = df["book_id"][~(df["user_id"]==user_id)].drop_duplicates().values.tolist()
temp_dict={}
for i in didnt_read:
temp_dict[i] = svd_model.predict(uid=user_id, iid=i)[3]
suggestions = pd.DataFrame(temp_dict.items(),columns=["book_id",'possible_rate']).sort_values(by="possible_rate", ascending=False).head(sug)
merged = pd.merge(suggestions,books[["book_id","title"]], how="inner", on="book_id")
return merged
In [19]:
#for our sample, our machine learning model suggested 5 different book which our model can give around 4.7 rating.
suggest(df, user_id, 5)
Out[19]:
In [ ]:
In [ ]: