Introduction¶
In [1]:
import sys
print(sys.version)
print(sys.executable)
In [2]:
import pandas as pd
import numpy as np
import scipy
%load_ext watermark
%watermark -iv
Data¶
In [3]:
if 'google.colab' in sys.modules:
!wget -O books.csv "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
!wget -O ratings.csv "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv"
In [4]:
from pathlib import Path
if 'google.colab' not in sys.modules:
path_data = Path.home() / 'github/Recommender_System/data/goodbooks_10k'
books = pd.read_csv(path_data / 'books.csv').head(1000)
ratings = pd.read_csv(path_data / 'ratings.csv').head(5000)
print(books.shape)
print(ratings.shape)
display(books.head(2))
display(ratings.head(2))
In [5]:
print(books.shape)
print(books.columns)
books.head(2)
Out[5]:
In [6]:
books_cols = ['book_id', 'authors', 'original_publication_year', 'title', 'average_rating']
books2 = books[books_cols]
books2.head(2)
Out[6]:
In [7]:
print(ratings.shape)
print(ratings.columns)
ratings.head(2)
Out[7]:
In [8]:
df = pd.merge(books, ratings, on="book_id", how="inner")
print(df.shape)
df.head(2)
Out[8]:
5. Knowledge-Based Recommender¶
Concept: Uses explicit knowledge about items and user preferences.
In [9]:
def knowledge_based_recommender(user_id, ratings_df, books_df, n_recommendations=5):
# Get user's preferred features (e.g., from past ratings)
user_books = ratings_df[ratings_df['user_id'] == user_id]
user_books = user_books.merge(books_df, left_on='book_id', right_on='book_id')
# Extract preferences (example: top authors)
if not user_books.empty:
top_authors = user_books['authors'].value_counts().index[0]
top_year = user_books['original_publication_year'].mode()[0]
else:
# Default preferences for cold start
top_authors = "J.K. Rowling"
top_year = 2000
# Filter books based on knowledge rules
recommendations = books_df[
(books_df['authors'].str.contains(top_authors)) |
(abs(books_df['original_publication_year'] - top_year) <= 5)
]
# Sort by popularity/rating
avg_ratings = ratings_df.groupby('book_id')['rating'].mean()
recommendations = recommendations.merge(avg_ratings, left_on='book_id', right_index=True)
# Remove already rated
rated_books = ratings_df[ratings_df['user_id'] == user_id]['book_id']
recommendations = recommendations[~recommendations['book_id'].isin(rated_books)]
return recommendations.sort_values('rating', ascending=False).head(n_recommendations)
# Usage
user_id = ratings['user_id'].iloc[0]
df_out = knowledge_based_recommender(user_id, ratings, books)
df_out
Out[9]:
In [ ]: