Introduction¶

import sys
print(sys.version)
print(sys.executable)

3.12.10 | packaged by conda-forge | (main, Apr 10 2025, 22:08:16) [MSC v.1943 64 bit (AMD64)]
C:\Users\Sumedha\.conda\envs\py312\python.exe

import pandas as pd
import numpy as np

Data¶

if 'google.colab' in sys.modules:
    !wget -O books.csv "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
    !wget -O ratings.csv "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv"

from pathlib import Path
if 'google.colab' not in sys.modules:
    path_data = Path.home() / 'github/Recommender_System/data/goodbooks_10k'
    books = pd.read_csv(path_data / 'books.csv').head(1000)
    ratings = pd.read_csv(path_data / 'ratings.csv').head(5000)

print(books.shape)
print(ratings.shape)

display(books.head(2))
display(ratings.head(2))

(1000, 23)
(5000, 3)

print(books.shape)
print(books.columns)
books.head(2)

(1000, 23)
Index(['book_id', 'goodreads_book_id', 'best_book_id', 'work_id',
       'books_count', 'isbn', 'isbn13', 'authors', 'original_publication_year',
       'original_title', 'title', 'language_code', 'average_rating',
       'ratings_count', 'work_ratings_count', 'work_text_reviews_count',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'image_url', 'small_image_url'],
      dtype='object')

books_cols = ['book_id', 'authors', 'original_publication_year', 'title', 'average_rating']
books2 = books[books_cols]
books2.head(2)

print(ratings.shape)
print(ratings.columns)
ratings.head(2)

(5000, 3)
Index(['user_id', 'book_id', 'rating'], dtype='object')

df = pd.merge(books, ratings, on="book_id", how="inner")
print(df.shape)
df.head(2)

(3031, 25)

Method 01: Collaborative Filtering: Item (book) based¶

Item-based recommendation is a type of recommendation system that suggests items to users based on the similarities between items themselves, rather than on the preferences or behaviors of other users. It is a commonly used technique in the field of recommender systems, which are used in various online platforms to suggest products, services, or content to users based on their preferences or behaviors.

References:

https://www.kaggle.com/code/huseyinbaytar/book-recommendation-systems

user_book_df = df.groupby(["user_id","title"])["rating"].mean().unstack().notnull()
user_book_df.head(2)

df.head(2).T

book_title = user_book_df.columns[0]
book_title

"'Salem's Lot"

sample_df = user_book_df[book_title]
print(sample_df.shape)
sample_df.head(2)

(132,)

user_id
1    False
2    False
Name: 'Salem's Lot, dtype: bool

user_book_df.corrwith(sample_df).sort_values(ascending=False).head(5)

title
'Salem's Lot                                        1.000000
Night Shift                                         1.000000
Different Seasons                                   0.813350
The Green Mile                                      0.813350
The Color of Magic (Discworld, #1; Rincewind #1)    0.704403
dtype: float64

method 02. Collaborative Filtering: User Based¶

from sklearn.metrics.pairwise import cosine_similarity

def item_based_cf(user_id, ratings_df, books_df, n_recommendations=5):
    # Create user-item matrix
    user_item_matrix = ratings_df.pivot_table(index='user_id', columns='book_id', values='rating').fillna(0)

    # Calculate item-item similarity
    item_similarity = cosine_similarity(user_item_matrix.T)
    item_similarity_df = pd.DataFrame(item_similarity,
                                     index=user_item_matrix.columns,
                                     columns=user_item_matrix.columns)

    # Get user's rated items
    user_ratings = ratings_df[ratings_df['user_id'] == user_id]

    # Calculate weighted scores
    recommendations = pd.Series(dtype='float64')
    for book_id in user_ratings['book_id']:
        similar_books = item_similarity_df[book_id]
        user_rating = user_ratings.loc[user_ratings['book_id'] == book_id, 'rating'].values[0]
        recommendations = recommendations.add(similar_books * user_rating, fill_value=0)

    # Remove already rated items
    recommendations = recommendations.drop(user_ratings['book_id'])

    # Get top recommendations
    top_book_ids = recommendations.sort_values(ascending=False).head(n_recommendations).index

    return books_df[books_df['book_id'].isin(top_book_ids)]

# Usage
user_id = ratings['user_id'].iloc[0]
df_out = item_based_cf(1, ratings, books)
df_out

Results¶

print(user_id)

1

ratings[ratings['user_id'] == user_id]

df_out['image_url'].values

array(['https://images.gr-assets.com/books/1406383769m/49628.jpg',
       'https://images.gr-assets.com/books/1327128714m/28921.jpg'],
      dtype=object)

from IPython.display import Image, HTML

def path_to_image_html(path):
    return '<img src="'+ path + '""/>'

cols = ['image_url', 'title', 'authors', 'original_publication_year', 'average_rating']
HTML(df_out[cols].to_html(escape=False, formatters=dict(image_url=path_to_image_html), justify='center'))

Method 03: Collaborative Filtering: Item (Book) Based using multiple columns¶

books2 = books.fillna('')

def clean_data(x):
    return str.lower(x.replace(" ",""))

features = ['original_title','authors','average_rating']
books2=books2[features]

books2 = books2.astype(str)
books2.dtypes

original_title    object
authors           object
average_rating    object
dtype: object

for feature in features:
    books2[feature] = books2[feature].astype(str).apply(clean_data)

books2.head(2)

def create_soup(x):
    return x['original_title']+ ' ' + x['authors']+ ' ' + x['average_rating']

books2['soup'] = books2.apply(create_soup, axis=1)

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(books2['soup'])

cosine_sim = cosine_similarity(count_matrix,count_matrix)

print(type(cosine_sim))
print(cosine_sim.shape)

<class 'numpy.ndarray'>
(1000, 1000)

books2 = books2.reset_index()
indices= pd.Series(books2.index, index=books2['original_title'])
indices.head(2)

original_title
thehungergames                         0
harrypotterandthephilosopher'sstone    1
dtype: int64

def get_recommendations_new(title, cosine_sim=cosine_sim):
    title=title.replace(' ','').lower()
    idx = indices[title]

    # Get the pairwsie similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar books
    sim_scores = sim_scores[1:11]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar books
    return list(books['original_title'].iloc[book_indices])

books2['original_title'].head(2)

0                         thehungergames
1    harrypotterandthephilosopher'sstone
Name: original_title, dtype: object

books['original_title'].head(2)

0                            The Hunger Games
1    Harry Potter and the Philosopher's Stone
Name: original_title, dtype: object

book_title = books['original_title'].iloc[0]
lst = get_recommendations_new(book_title, cosine_sim)
print('Given Book: ', book_title)
print('Recommended Books: ')
print(lst)

Given Book:  The Hunger Games
Recommended Books: 
['Catching Fire', ' The Fellowship of the Ring', 'Mockingjay', 'A Thousand Splendid Suns', 'City of Glass', 'A Light in the Attic', 'The Hunger Games Box Set', 'Hopeless', 'Different Seasons', 'A Fine Balance']

cols = ['image_url', 'title', 'authors', 'original_publication_year', 'average_rating']
df_out = books[books['original_title'].isin(lst)]
HTML(df_out[cols].to_html(escape=False, formatters=dict(image_url=path_to_image_html), justify='center'))

	book_id	goodreads_book_id	best_book_id	work_id	books_count	isbn	isbn13	authors	original_publication_year	original_title	...	ratings_count	work_ratings_count	work_text_reviews_count	ratings_1	ratings_2	ratings_3	ratings_4	ratings_5	image_url	small_image_url
0	1	2767052	2767052	2792775	272	439023483	9.780439e+12	Suzanne Collins	2008.0	The Hunger Games	...	4780653	4942365	155254	66715	127936	560092	1481305	2706317	https://images.gr-assets.com/books/1447303603m...	https://images.gr-assets.com/books/1447303603s...
1	2	3	3	4640799	491	439554934	9.780440e+12	J.K. Rowling, Mary GrandPré	1997.0	Harry Potter and the Philosopher's Stone	...	4602479	4800065	75867	75504	101676	455024	1156318	3011543	https://images.gr-assets.com/books/1474154022m...	https://images.gr-assets.com/books/1474154022s...

	book_id	goodreads_book_id	best_book_id	work_id	books_count	isbn	isbn13	authors	original_publication_year	original_title	...	ratings_count	work_ratings_count	work_text_reviews_count	ratings_1	ratings_2	ratings_3	ratings_4	ratings_5	image_url	small_image_url
0	1	2767052	2767052	2792775	272	439023483	9.780439e+12	Suzanne Collins	2008.0	The Hunger Games	...	4780653	4942365	155254	66715	127936	560092	1481305	2706317	https://images.gr-assets.com/books/1447303603m...	https://images.gr-assets.com/books/1447303603s...
1	2	3	3	4640799	491	439554934	9.780440e+12	J.K. Rowling, Mary GrandPré	1997.0	Harry Potter and the Philosopher's Stone	...	4602479	4800065	75867	75504	101676	455024	1156318	3011543	https://images.gr-assets.com/books/1474154022m...	https://images.gr-assets.com/books/1474154022s...

	book_id	authors	original_publication_year	title	average_rating
0	1	Suzanne Collins	2008.0	The Hunger Games (The Hunger Games, #1)	4.34
1	2	J.K. Rowling, Mary GrandPré	1997.0	Harry Potter and the Sorcerer's Stone (Harry P...	4.44

	book_id	goodreads_book_id	best_book_id	work_id	books_count	isbn	isbn13	authors	original_publication_year	original_title	...	work_text_reviews_count	ratings_1	ratings_2	ratings_3	ratings_4	ratings_5	image_url	small_image_url	user_id	rating
0	2	3	3	4640799	491	439554934	9.780440e+12	J.K. Rowling, Mary GrandPré	1997.0	Harry Potter and the Philosopher's Stone	...	75867	75504	101676	455024	1156318	3011543	https://images.gr-assets.com/books/1474154022m...	https://images.gr-assets.com/books/1474154022s...	4	5
1	2	3	3	4640799	491	439554934	9.780440e+12	J.K. Rowling, Mary GrandPré	1997.0	Harry Potter and the Philosopher's Stone	...	75867	75504	101676	455024	1156318	3011543	https://images.gr-assets.com/books/1474154022m...	https://images.gr-assets.com/books/1474154022s...	15	4

	0	1
book_id	2	2
goodreads_book_id	3	3
best_book_id	3	3
work_id	4640799	4640799
books_count	491	491
isbn	439554934	439554934
isbn13	9780439554930.0	9780439554930.0
authors	J.K. Rowling, Mary GrandPré	J.K. Rowling, Mary GrandPré
original_publication_year	1997.0	1997.0
original_title	Harry Potter and the Philosopher's Stone	Harry Potter and the Philosopher's Stone
title	Harry Potter and the Sorcerer's Stone (Harry P...	Harry Potter and the Sorcerer's Stone (Harry P...
language_code	eng	eng
average_rating	4.44	4.44
ratings_count	4602479	4602479
work_ratings_count	4800065	4800065
work_text_reviews_count	75867	75867
ratings_1	75504	75504
ratings_2	101676	101676
ratings_3	455024	455024
ratings_4	1156318	1156318
ratings_5	3011543	3011543
image_url	https://images.gr-assets.com/books/1474154022m...	https://images.gr-assets.com/books/1474154022m...
small_image_url	https://images.gr-assets.com/books/1474154022s...	https://images.gr-assets.com/books/1474154022s...
user_id	4	15
rating	5	4

title	'Salem's Lot	1776	1984	2001: A Space Odyssey (Space Odyssey, #1)	A Beautiful Mind	A Brief History of Time	A Child Called "It" (Dave Pelzer #1)	A Christmas Carol	A Clash of Kings (A Song of Ice and Fire, #2)	A Clockwork Orange	...	White Fang	White Oleander	Who Moved My Cheese?	Wicked: The Life and Times of the Wicked Witch of the West (The Wicked Years, #1)	Wizard and Glass (The Dark Tower, #4)	Wolves of the Calla (The Dark Tower, #5)	World War Z: An Oral History of the Zombie War	Wuthering Heights	Xenocide (Ender's Saga, #3)	Zen and the Art of Motorcycle Maintenance: An Inquiry Into Values
user_id
1	False	False	False	False	False	False	False	False	False	False	...	False	False	False	False	False	False	False	False	False	False
2	False	False	False	False	False	False	False	False	False	False	...	False	False	True	False	False	False	False	False	False	False

	book_id	goodreads_book_id	best_book_id	work_id	books_count	isbn	isbn13	authors	original_publication_year	original_title	...	ratings_count	work_ratings_count	work_text_reviews_count	ratings_1	ratings_2	ratings_3	ratings_4	ratings_5	image_url	small_image_url
521	522	49628	49628	1871423	113	375507256	9.780376e+12	David Mitchell	2004.0	Cloud Atlas	...	152441	172239	17001	5442	10645	28665	58983	68504	https://images.gr-assets.com/books/1406383769m...	https://images.gr-assets.com/books/1406383769s...
819	820	28921	28921	3333111	170	571225381	9.780571e+12	Kazuo Ishiguro	1989.0	The Remains of the Day	...	105892	121763	7455	1500	5028	21065	46702	47468	https://images.gr-assets.com/books/1327128714m...	https://images.gr-assets.com/books/1327128714s...

	original_title	authors	average_rating
0	thehungergames	suzannecollins	4.34
1	harrypotterandthephilosopher'sstone	j.k.rowling,marygrandpré	4.44

	title	authors	original_publication_year	average_rating
16	Catching Fire (The Hunger Games, #2)	Suzanne Collins	2009.0	4.30
18	The Fellowship of the Ring (The Lord of the Rings, #1)	J.R.R. Tolkien	1954.0	4.34
19	Mockingjay (The Hunger Games, #3)	Suzanne Collins	2010.0	4.03
66	A Thousand Splendid Suns	Khaled Hosseini	2007.0	4.34
133	City of Glass (The Mortal Instruments, #3)	Cassandra Clare	2009.0	4.34
277	A Light in the Attic	Shel Silverstein	1981.0	4.34
506	The Hunger Games Trilogy Boxset (The Hunger Games, #1-3)	Suzanne Collins	2010.0	4.49
538	Hopeless (Hopeless, #1)	Colleen Hoover	2012.0	4.34
738	Different Seasons	Stephen King	1982.0	4.34
778	A Fine Balance	Rohinton Mistry	1996.0	4.34

	user_id	book_id	rating
0	1	258	5
75	1	268	3
76	1	5556	3
77	1	3638	3
78	1	1796	5
79	1	867	3
80	1	47	3
81	1	2738	3