!pip install numpy==1.26.4 --force-reinstall --no-cache-dir

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/61.0 kB ? eta -:--:--
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.0/61.0 kB 5.1 MB/s eta 0:00:00
Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 308.6 MB/s eta 0:00:00
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.
Successfully installed numpy-1.26.4

!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/154.4 kB ? eta -:--:--
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 154.4/154.4 kB 4.4 MB/s eta 0:00:00
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
  Preparing metadata (pyproject.toml) ... done
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise->surprise) (1.5.1)
Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise->surprise) (1.26.4)
Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise->surprise) (1.15.3)
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... done
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2469542 sha256=4114580d77c8540cc96ff496fe51e7985d64fac58266fb5b78b954b50d8a384d
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.4 surprise-0.1

# Imports
import pandas as pd
import numpy as np
import warnings
import pickle
import os
import matplotlib.pyplot as plt
from surprise import (
    Reader, Dataset, SVD, SVDpp, KNNBasic, KNNWithZScore,
    BaselineOnly, NMF, accuracy
)
from surprise.model_selection import cross_validate, GridSearchCV
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Disable warnings
warnings.filterwarnings('ignore')

ratings_df = pd.read_csv('/content/drive/MyDrive/Anime Recommender System/users-score-2023.csv')
ratings_df = ratings_df.drop_duplicates(subset=['user_id', 'anime_id'])

reader = Reader(rating_scale=(ratings_df['rating'].min(), ratings_df['rating'].max()))
train_data = Dataset.load_from_df(ratings_df[['user_id', 'anime_id', 'rating']], reader)

trainset = train_data.build_full_trainset()

model = SVD()
cv_results = cross_validate(model, train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2537  1.2527  1.2534  1.2538  1.2548  1.2537  0.0007  
MAE (testset)     0.9261  0.9257  0.9256  0.9262  0.9264  0.9260  0.0003  
Fit time          286.62  290.84  323.92  288.92  290.76  296.21  13.94   
Test time         110.29  88.17   83.31   87.45   76.39   89.12   11.38

model3 = NMF()
cv_results3 = cross_validate(model3, train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.1817  2.1768  2.1722  2.1773  2.1686  2.1753  0.0045  
MAE (testset)     1.9119  1.9062  1.9016  1.9070  1.8979  1.9049  0.0048  
Fit time          444.63  451.36  452.43  451.94  453.82  450.84  3.21    
Test time         70.93   64.93   71.47   64.80   71.32   68.69   3.13

sim_options = {
    'name': 'cosine',  # Cosine similarity (can also use 'pearson')
    'user_based': False  # False → Item-based
}
model2 = KNNBasic(sim_options=sim_options)
cv_results2 = cross_validate(model2, train_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.3807  1.3808  1.3807  1.3790  1.3793  1.3801  0.0008  
MAE (testset)     1.0370  1.0372  1.0374  1.0354  1.0364  1.0367  0.0007  
Fit time          266.49  282.66  291.02  287.58  299.50  285.45  10.96   
Test time         819.10  827.54  816.43  917.87  896.85  855.56  42.97

# Model names
models = ['SVD', 'NMF', 'KNNBasic']

# Mean and Std for RMSE and MAE
rmse_means = [1.2537, 2.1753, 1.3801]
rmse_stds = [0.0007, 0.0045, 0.0008]
mae_means = [0.9260, 1.9049, 1.0367]
mae_stds = [0.0003, 0.0048, 0.0007]

# X-axis positions
x = np.arange(len(models))
width = 0.35  # width of the bars

# Create the plot
fig, ax = plt.subplots(figsize=(6, 6))

# Plot RMSE and MAE bars with error bars
rmse_bars = ax.bar(x - width/2, rmse_means, width, yerr=rmse_stds, label='RMSE', capsize=5)
mae_bars = ax.bar(x + width/2, mae_means, width, yerr=mae_stds, label='MAE', capsize=5)

# Set labels and title
ax.set_ylabel('Error Score')
ax.set_xlabel('Recommendation Algorithm')
ax.set_title('Model Evaluation Comparison: RMSE & MAE of SVD, NMF, and KNNBasic')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()

# Annotate bars with values
for bars in [rmse_bars, mae_bars]:
    for bar in bars:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 5),
                    textcoords="offset points",
                    ha='center', va='bottom')

# Final layout without grid
plt.tight_layout()
plt.show()

# Define full save path
save_path = '/content/drive/MyDrive/Anime Recommender System/svd_best_model13.pkle'

# Save the trained SVD model
with open(save_path, 'wb') as f:
    pickle.dump(model, f)

# Define the parameter grid
param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [10, 20, 30],
    'lr_all': [0.001, 0.005, 0.01],
    'reg_all': [0.01, 0.02, 0.05]
}

# Set up GridSearchCV with 5-fold cross-validation
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5)

# Fit the grid search on the training data
gs.fit(train_data)

# Retrieve the best parameters for minimizing RMSE
best_params = gs.best_params['rmse']
print("Best parameters for RMSE:", best_params)
print("Best RMSE score:", gs.best_score['rmse'])

best_params = gs.best_params['rmse']
final_model = SVD(**best_params)
final_model.fit(train_data.build_full_trainset())

Collaborative Anime Filtering¶

Library Imports and Environment Setup¶

Data Exploration and preprocessing¶

Train NMF Model with Cross-Validation¶

Train Item-Based KNN Model with Cross-Validation¶

Model Evaluation Comparison: `RMSE` & `MAE` of `SVD`, `NMF`, and `KNNBasic`¶

Save the best model¶

Define Hyperparameter Grid for SVD(Best Model) Tuning¶

Collaborative Anime Filtering¶

Library Imports and Environment Setup¶

Data Exploration and preprocessing¶

Train NMF Model with Cross-Validation¶

Train Item-Based KNN Model with Cross-Validation¶

Model Evaluation Comparison: RMSE & MAE of SVD, NMF, and KNNBasic¶

Save the best model¶

Define Hyperparameter Grid for SVD(Best Model) Tuning¶

Model Evaluation Comparison: `RMSE` & `MAE` of `SVD`, `NMF`, and `KNNBasic`¶