!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/154.4 kB ? eta -:--:--
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 154.4/154.4 kB 4.6 MB/s eta 0:00:00
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
  Preparing metadata (pyproject.toml) ... done
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise) (1.4.2)
Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise) (1.26.4)
Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise) (1.14.1)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... done
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505173 sha256=1d0b27e2f9ee6583dd311c8a311077197bc61bcd58155d1b2c163fb2cc7c7b7d
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4

import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import save_npz, csr_matrix,load_npz
from surprise import Reader, Dataset, SVD, PredictionImpossible
import datetime as dt
import pickle
import os
from IPython.display import Image, HTML

merged_df = pd.read_csv('/content/drive/MyDrive/Recommendation System/MovieLens Metadata Datasets/new_merged_df.csv')

merged_df.head()

merged_df.columns

Index(['adult', 'backdrop_path', 'belongs_to_collection', 'budget', 'genres',
       'homepage', 'id', 'imdb_id', 'original_language', 'original_title',
       'overview', 'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'cast', 'crew', 'keywords'],
      dtype='object')

merged_df.shape

(77638, 28)

def extract_director(crew_list):

  for crew_member in crew_list:
    if crew_member['job'] == 'Director':
      return crew_member['name']

  return np.nan

def extract_top_n_items(data_list, n):
  if isinstance(data_list, list):
    # Extract 'name' values from each dictionary in the list
    name_list = [entry['name'] for entry in data_list]

    # Return only the top n names if more than n exist
    return name_list[:n] if len(name_list) > n else name_list

  # Return an empty list if the input is not a list
  return []

new_df = merged_df.copy()

features = ['cast', 'crew', 'keywords', 'genres']
for feature in features:
    new_df[feature] = new_df[feature].fillna("[]").apply(literal_eval)

new_df['director'] = new_df['crew'].apply(extract_director)

new_df['cast'] = new_df['cast'].apply(lambda x: extract_top_n_items(x, 3))
new_df['keywords'] = new_df['keywords'].apply(lambda x: extract_top_n_items(x, 15))
new_df['genres'] = new_df['genres'].apply(lambda x: extract_top_n_items(x, 4))

def clean_text_data(input_data):
  if isinstance(input_data, list):
    return [item.lower().replace(" ", "") for item in input_data]

  if isinstance(input_data, str):
    return input_data.lower().replace(" ", "")

  return ''

features = ['cast', 'keywords', 'director', 'genres']

for feature in features:
    new_df[feature] = new_df[feature].apply(clean_text_data)

new_df[features].head()

def create_movie_soup(movie_data):
    return ' '.join(movie_data['keywords']) + ' ' + ' '.join(movie_data['cast']) + ' ' + movie_data['director'] + ' ' + ' '.join(movie_data['genres'])

new_df['soup'] = new_df.apply(create_movie_soup, axis=1)

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(new_df['soup'])

count_matrix_sparse = csr_matrix(count_matrix)

cosine_similarity2 = cosine_similarity(count_matrix_sparse, dense_output=False)

cosine_similarity2.shape

(77638, 77638)

save_path = "/content/drive/MyDrive/Recommendation System/Ccosine_similarity.npz"
sparse_matrix = csr_matrix(cosine_similarity2)
save_npz(save_path, sparse_matrix)

cosine_similarity2 = load_npz("/content/drive/MyDrive/Recommendation System/Ccosine_similarity.npz")

cosine_similarity2 = cosine_similarity2.toarray()

new_df = new_df.reset_index()
indices = pd.Series(new_df.index, index=new_df['title'])

new_df = pd.read_csv("/content/drive/MyDrive/Recommendation System/new_df_data.csv")

with open("/content/drive/MyDrive/Recommendation System/MovieLens Metadata Datasets/best_svd1.pkl", "rb") as file:
    best_svd1 = pickle.load(file)

links_df = pd.read_csv("/content/drive/MyDrive/Recommendation System/MovieLens Metadata Datasets/links_small.csv")

ratings_df = pd.read_csv("/content/drive/MyDrive/Recommendation System/MovieLens Metadata Datasets/ratings_small.csv")

def weighted_rating(movie, C, m):
    v = movie['vote_count']
    R = movie['vote_average']

    # Avoid division by zero
    if v == 0:
        return C

    return (v / (v + m) * R) + (m / (m + v) * C)

def improved_hybrid_recommendations1(user_id, title, best_svd_model=best_svd1, ratings_df=ratings_df,
                           cosine_sim=cosine_similarity2, links_df=links_df, top_n=10,
                           popularity_weight=0.15, similarity_weight=0.85):
    # Validate if the title exists
    index = indices.get(title, None)
    if index is None:
        raise ValueError(f"Movie title '{title}' not found in the dataset.")

    # Get top similar movies using content-based filtering
    similarity_scores = np.array(cosine_sim[index])
    similar_movie_indices = similarity_scores.argsort()[::-1][1:62]  # Get top 60 similar movies (excluding itself)

    recommended_movies = new_df.iloc[similar_movie_indices][['title', 'id', 'vote_count', 'vote_average', 'release_date', 'poster_path']]
    recommended_movies = recommended_movies.copy()
    recommended_movies['vote_count'] = recommended_movies['vote_count'].fillna(0).astype(int)
    recommended_movies['vote_average'] = recommended_movies['vote_average'].fillna(0).astype(float)

    # Compute IMDb weighted rating after filtering out low vote count movies
    C = recommended_movies['vote_average'].mean()
    m = recommended_movies['vote_count'].quantile(0.65)

    # Apply IMDb weighted rating formula
    recommended_movies['weighted_rating'] = recommended_movies.apply(lambda x: weighted_rating(x, C, m), axis=1)

    qualified_movies = recommended_movies.copy()

    # Recompute the similarity scores only for the qualified movies
    filtered_indices = [i for i in similar_movie_indices if i in qualified_movies.index]
    filtered_similarity_scores = similarity_scores[filtered_indices]


    # Reset index to align indices properly
    qualified_movies = qualified_movies.reset_index(drop=True)

    # Keep only the indices that are still present in qualified_movies
    filtered_similarity_scores = pd.DataFrame({
    "id": new_df.iloc[similar_movie_indices]["id"].values,
    "similarity_score": similarity_scores[similar_movie_indices]
    })

    # Merge to assign similarity scores properly
    qualified_movies = qualified_movies.merge(filtered_similarity_scores, on="id", how="left")

    # Fill missing similarity scores (if any) with the minimum similarity
    qualified_movies["similarity_score"] = qualified_movies["similarity_score"].fillna(qualified_movies["similarity_score"].min())

    # Cold-Start Handling: New User Recommendations
    if user_id not in ratings_df['userId'].unique():
        print(f"User {user_id} is new. Using content and genre-based recommendations.")

        # Compute Final Score for Sorting
        qualified_movies['final_score'] = (
            (popularity_weight * qualified_movies['weighted_rating']) +
            (similarity_weight * qualified_movies['similarity_score'])
        )

        # Sort based on new weighted score
        return qualified_movies.sort_values('final_score', ascending=False).head(top_n)[['poster_path','title','release_date', 'weighted_rating', 'similarity_score', 'final_score']]

    # Match TMDb ID to MovieLens movieId before SVD predictions
    qualified_movies = qualified_movies.merge(links_df[['tmdbId', 'movieId']], left_on='id', right_on='tmdbId', how='left')
    qualified_movies = qualified_movies.dropna(subset=['movieId'])  # Drop rows without MovieLens ID
    qualified_movies['movieId'] = qualified_movies['movieId'].astype(int)  # Convert to integer

    # Count user's past ratings
    user_ratings_count = ratings_df[ratings_df['userId'] == user_id].shape[0]

    # Predict ratings using SVD for known users
    predicted_ratings = []
    for movie_id in qualified_movies['movieId']:
        try:
            prediction = best_svd_model.predict(user_id, movie_id)
            predicted_ratings.append(prediction.est)
        except PredictionImpossible:
            # Use a weighted average of nearest neighbors' ratings instead of global average (C)
            nearest_neighbors = ratings_df[ratings_df['movieId'] == movie_id]['rating']
            predicted_ratings.append(nearest_neighbors.mean() if not nearest_neighbors.empty else C)

    qualified_movies['predicted_rating'] = predicted_ratings

    # Dynamic Weighting for Hybrid Score
    if user_ratings_count < 10:
        svd_weight = 0.5  # Less confidence in SVD for new users
    elif user_ratings_count < 50:
        svd_weight = 0.6
    else:
        svd_weight = 0.7  # Higher confidence for active users

    imdb_weight = 1 - svd_weight  # Remaining weight goes to IMDb rating
    qualified_movies['final_score'] = (svd_weight * qualified_movies['predicted_rating']) + (imdb_weight * qualified_movies['weighted_rating'])

    # Return final recommendations
    return qualified_movies.sort_values('final_score', ascending=False).head(min(top_n, len(qualified_movies)))[['poster_path','title', 'weighted_rating', 'predicted_rating', 'final_score']]

df = improved_hybrid_recommendations1(2000, 'Demon Slayer -Kimetsu no Yaiba- The Movie: Mugen Train',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations1(2000, 'John Wick: Chapter 4',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

improved_hybrid_recommendations1(2000, 'Iron Man',top_n=20)

User 2000 is new. Using content and genre-based recommendations.

def _apply_recency_boost(df):
    """
    Helper function to parse release_date into a numeric year,
    then create a 0–1 scaled 'recency_score' column in df.
    """
    # Parse year from release_date
    df['year'] = pd.to_datetime(df['release_date'], errors='coerce').dt.year

    # Fallback for rows with missing/invalid release_date
    df['year'] = df['year'].fillna(df['year'].min())

    # Scale years to [0,1] range
    min_year = df['year'].min()
    max_year = df['year'].max()
    year_range = max_year - min_year if max_year != min_year else 1

    df['recency_score'] = (df['year'] - min_year) / year_range

    return df

def improved_hybrid_recommendations2(
    user_id, title, best_svd_model=best_svd1, ratings_df=ratings_df,
    cosine_sim=cosine_similarity2, links_df=links_df, top_n=10,
    popularity_weight=0.15, similarity_weight=0.85,
    recency_weight=0.2
):
    # Validate the title
    index = indices.get(title, None)
    if index is None:
        raise ValueError(f"Movie title '{title}' not found in the dataset.")

    # Get top content-based similar movies
    similarity_scores = np.array(cosine_sim[index])
    similar_movie_indices = similarity_scores.argsort()[::-1][1:62]

    recommended_movies = new_df.iloc[similar_movie_indices][
        ['title','id','vote_count','vote_average','release_date','poster_path']
    ].copy()
    recommended_movies['vote_count']   = recommended_movies['vote_count'].fillna(0).astype(int)
    recommended_movies['vote_average'] = recommended_movies['vote_average'].fillna(0).astype(float)

    # IMDb Weighted Rating
    C = recommended_movies['vote_average'].mean()
    m = recommended_movies['vote_count'].quantile(0.65)
    recommended_movies['weighted_rating'] = recommended_movies.apply(
        lambda x: weighted_rating(x, C, m), axis=1
    )

    qualified_movies = recommended_movies.copy()
    qualified_movies.reset_index(drop=True, inplace=True)

    # Merge similarity scores
    similarity_df = pd.DataFrame({
        "id": new_df.iloc[similar_movie_indices]["id"].values,
        "similarity_score": similarity_scores[similar_movie_indices]
    })
    qualified_movies = qualified_movies.merge(similarity_df, on="id", how="left")
    qualified_movies["similarity_score"] = qualified_movies["similarity_score"].fillna(
        qualified_movies["similarity_score"].min()
    )

    # Cold-Start Handling (new user)
    if user_id not in ratings_df['userId'].unique():
        print(f"User {user_id} is new. Using content and genre-based recommendations.")

        # Incorporate recency
        qualified_movies = _apply_recency_boost(qualified_movies)

        # Final Score (no SVD for new user)
        qualified_movies['final_score'] = (
            (popularity_weight * qualified_movies['weighted_rating'])
            + (similarity_weight * qualified_movies['similarity_score'])
        )

        # Combine recency into final_score
        qualified_movies['final_score'] = (
            qualified_movies['final_score'] * (1 - recency_weight)
            + qualified_movies['recency_score'] * recency_weight
        )

        return qualified_movies\
                 .sort_values('final_score', ascending=False)\
                 .head(top_n)[['poster_path','title','release_date',
                               'weighted_rating','similarity_score','final_score']]

    # Match TMDb ID -> MovieLens movieId
    qualified_movies = qualified_movies.merge(
        links_df[['tmdbId','movieId']],
        left_on='id',
        right_on='tmdbId',
        how='left'
    )
    qualified_movies.dropna(subset=['movieId'], inplace=True)
    qualified_movies['movieId'] = qualified_movies['movieId'].astype(int)

    # Predict user ratings with SVD
    user_ratings_count = ratings_df[ratings_df['userId'] == user_id].shape[0]

    predicted_ratings = []
    for movie_id in qualified_movies['movieId']:
        try:
            prediction = best_svd_model.predict(user_id, movie_id)
            predicted_ratings.append(prediction.est)
        except PredictionImpossible:
            nearest_neighbors = ratings_df[ratings_df['movieId'] == movie_id]['rating']
            predicted_ratings.append(nearest_neighbors.mean() if not nearest_neighbors.empty else C)

    qualified_movies['predicted_rating'] = predicted_ratings

    # Dynamic Weighting
    if user_ratings_count < 10:
        svd_weight = 0.5
    elif user_ratings_count < 50:
        svd_weight = 0.6
    else:
        svd_weight = 0.7

    imdb_weight = 1 - svd_weight
    qualified_movies['final_score'] = (
        svd_weight * qualified_movies['predicted_rating']
        + imdb_weight * qualified_movies['weighted_rating']
    )

    # Recency Boost
    qualified_movies = _apply_recency_boost(qualified_movies)

    # Combine recency with final_score
    qualified_movies['final_score'] = (
        qualified_movies['final_score'] * (1 - recency_weight)
        + qualified_movies['recency_score'] * recency_weight
    )

    # Sort & Return
    return qualified_movies\
             .sort_values('final_score', ascending=False)\
             .head(min(top_n, len(qualified_movies)))[
                 ['poster_path','title','release_date','weighted_rating',
                  'predicted_rating','final_score']
             ]

df = improved_hybrid_recommendations2(2000, 'Iron Man',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations2(2000, 'Demon Slayer: Kimetsu no Yaiba -To the Swordsmith Village-',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations2(2000, 'Black Panther',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations2(2000, 'Mission to Mars',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations2(2000, 'No Game No Life: Zero',top_n=60)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

!pip install tmdbv3api

Collecting tmdbv3api
  Downloading tmdbv3api-1.9.0-py3-none-any.whl.metadata (8.0 kB)
Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from tmdbv3api) (2.32.3)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->tmdbv3api) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->tmdbv3api) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->tmdbv3api) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->tmdbv3api) (2025.1.31)
Downloading tmdbv3api-1.9.0-py3-none-any.whl (25 kB)
Installing collected packages: tmdbv3api
Successfully installed tmdbv3api-1.9.0

from tmdbv3api import TMDb, Movie

tmdb = TMDb()
tmdb.api_key = 'c7ec19ffdd3279641fb606d19ceb9bb1'
movie_api = Movie()

def fetch_poster_path(movie_title):
    try:
        results = movie_api.search(movie_title)
        if results:
            poster_path = results[0].poster_path
            if poster_path:
                return f"https://image.tmdb.org/t/p/w500{poster_path}"
    except Exception as e:
        print(f"Error fetching poster for {movie_title}: {e}")
    return 'https://via.placeholder.com/300x450?text=No+Image+Available'

new_df['poster_path'] = new_df.apply(
    lambda row: fetch_poster_path(row['title']) if pd.isnull(row['poster_path']) else row['poster_path'],
    axis=1
)

Error fetching poster for 5G - The War Against You: attribute name must be string, not 'int'
Error fetching poster for Kino-Pravda No. 17: For the First Agricultural and Cottage Industries Exhibition in the USSR: attribute name must be string, not 'int'
Error fetching poster for L'histoire d'un crime: attribute name must be string, not 'int'
Error fetching poster for Vanishing Pearls: The Oystermen of Pointe à la Hache: The resource you requested could not be found.
Error fetching poster for What a Fuck Am I Doing on This Battlefield: attribute name must be string, not 'int'
Error fetching poster for Whoregasm: attribute name must be string, not 'int'
Error fetching poster for Игра на выбывание: attribute name must be string, not 'int'

new_df_data = new_df[['poster_path','title','release_date','popularity', 'vote_average', 'vote_count','genres', 'id' ]]
save_path = "/content/drive/MyDrive/Recommendation System/new_df_data.csv"
new_df_data.to_csv(save_path, index=False)

new_df['poster_path'].isna().sum()

0

save_path = "/content/drive/MyDrive/Recommendation System/count_matrix.npz"
save_npz(save_path, count_matrix)

new_df2 = pd.read_csv("/content/drive/MyDrive/Recommendation System/new_df_data.csv")

new_df2 = new_df2.reset_index()
indices = pd.Series(new_df2.index, index=new_df2['title'])

count_matrix = load_npz('/content/drive/MyDrive/Recommendation System/count_matrix.npz')

def get_top_similar_movies(movie_index, count_matrix=count_matrix, top_n=62):
    # Compute similarity scores dynamically (only one movie vector at a time)
    movie_vector = count_matrix[movie_index]
    similarity_scores = cosine_similarity(movie_vector, count_matrix).flatten()

    # Get indices of top similar movies (excluding the movie itself)
    similar_indices = similarity_scores.argsort()[::-1][1:top_n + 1]

    # Return similar movie indices and their similarity scores
    return similar_indices, similarity_scores[similar_indices]

def improved_hybrid_recommendations3(
    user_id, title, best_svd_model=best_svd1, new_df=new_df2,ratings_df=ratings_df,
    links_df=links_df, top_n=10,
    popularity_weight=0.15, similarity_weight=0.85,
    recency_weight=0.2
):
    # Validate the title
    index = indices.get(title, None)
    if index is None:
        raise ValueError(f"Movie title '{title}' not found in the dataset.")

    # Get top content-based similar movies
    similar_movie_indices, similarity_scores = get_top_similar_movies(index, count_matrix, top_n=62)

    recommended_movies = new_df.iloc[similar_movie_indices][
        ['title','id','vote_count','vote_average','release_date','poster_path']
    ].copy()
    recommended_movies['vote_count']   = recommended_movies['vote_count'].fillna(0).astype(int)
    recommended_movies['vote_average'] = recommended_movies['vote_average'].fillna(0).astype(float)

    # IMDb Weighted Rating
    C = recommended_movies['vote_average'].mean()
    m = recommended_movies['vote_count'].quantile(0.65)
    recommended_movies['weighted_rating'] = recommended_movies.apply(
        lambda x: weighted_rating(x, C, m), axis=1
    )

    qualified_movies = recommended_movies.copy()
    qualified_movies.reset_index(drop=True, inplace=True)

    # Merge similarity scores
    similarity_df = pd.DataFrame({
    "id": new_df.iloc[similar_movie_indices]["id"].values,
    "similarity_score": similarity_scores
      })

    qualified_movies = recommended_movies.merge(similarity_df, on="id", how="left")
    qualified_movies["similarity_score"] = qualified_movies["similarity_score"].fillna(
    qualified_movies["similarity_score"].min()
      )


    # Cold-Start Handling (new user)
    if user_id not in ratings_df['userId'].unique():
        print(f"User {user_id} is new. Using content and genre-based recommendations.")

        # Incorporate recency
        qualified_movies = _apply_recency_boost(qualified_movies)

        # Final Score (no SVD for new user)
        qualified_movies['final_score'] = (
            (popularity_weight * qualified_movies['weighted_rating'])
            + (similarity_weight * qualified_movies['similarity_score'])
        )

        # Combine recency into final_score
        qualified_movies['final_score'] = (
            qualified_movies['final_score'] * (1 - recency_weight)
            + qualified_movies['recency_score'] * recency_weight
        )

        return qualified_movies\
                 .sort_values('final_score', ascending=False)\
                 .head(top_n)[['poster_path','title','release_date',
                               'weighted_rating','similarity_score','final_score']]

    # Match TMDb ID -> MovieLens movieId
    qualified_movies = qualified_movies.merge(
        links_df[['tmdbId','movieId']],
        left_on='id',
        right_on='tmdbId',
        how='left'
    )
    qualified_movies.dropna(subset=['movieId'], inplace=True)
    qualified_movies['movieId'] = qualified_movies['movieId'].astype(int)

    # Predict user ratings with SVD
    user_ratings_count = ratings_df[ratings_df['userId'] == user_id].shape[0]

    predicted_ratings = []
    for movie_id in qualified_movies['movieId']:
        try:
            prediction = best_svd_model.predict(user_id, movie_id)
            predicted_ratings.append(prediction.est)
        except PredictionImpossible:
            nearest_neighbors = ratings_df[ratings_df['movieId'] == movie_id]['rating']
            predicted_ratings.append(nearest_neighbors.mean() if not nearest_neighbors.empty else C)

    qualified_movies['predicted_rating'] = predicted_ratings

    # Dynamic Weighting
    if user_ratings_count < 10:
        svd_weight = 0.5
    elif user_ratings_count < 50:
        svd_weight = 0.6
    else:
        svd_weight = 0.7

    imdb_weight = 1 - svd_weight
    qualified_movies['final_score'] = (
        svd_weight * qualified_movies['predicted_rating']
        + imdb_weight * qualified_movies['weighted_rating']
    )

    # Recency Boost
    qualified_movies = _apply_recency_boost(qualified_movies, recency_weight)

    # Combine recency with final_score
    qualified_movies['final_score'] = (
        qualified_movies['final_score'] * (1 - recency_weight)
        + qualified_movies['recency_score'] * recency_weight
    )

    # Sort & Return
    return qualified_movies\
             .sort_values('final_score', ascending=False)\
             .head(min(top_n, len(qualified_movies)))[
                 ['poster_path','title','release_date','weighted_rating',
                  'predicted_rating','final_score']
             ]

df = improved_hybrid_recommendations3(2000, 'Interstellar',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations3(2000, 'No Game No Life: Zero',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations3(2000, 'Mission to Mars',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

df = improved_hybrid_recommendations3(2000, 'X-Men',top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

User 2000 is new. Using content and genre-based recommendations.

new_df2 = pd.read_csv("/content/drive/MyDrive/Recommendation System/new_df_data.csv")

def preprocess_movies(df):
    df = df.copy()

    # Ensure genres are properly formatted as lists
    def process_genres(x):
        if isinstance(x, list):  # Already a list, just lowercase it
            return [g.lower() for g in x]
        if isinstance(x, str):  # If stored as string, attempt conversion
            try:
                genres = eval(x)  # Convert string to list
                return [g.lower() for g in genres] if isinstance(genres, list) else []
            except (SyntaxError, ValueError):
                return []
        return []  # Return empty list for NaN or unexpected types

    df['genres_list'] = df['genres'].apply(process_genres)
    df['release_year'] = pd.to_datetime(df['release_date'], errors='coerce').dt.year

    return df

new_df2.head()

new_df2 = preprocess_movies(new_df2)

new_df2[new_df2['title']=='Interstellar']['genres']

C = new_df2['vote_average'].mean()
m = new_df2['vote_count'].quantile(0.95)
current_year = dt.datetime.now().year

def genre_based_recommender(genre, df=new_df2, top_n=100):

    # Insure that all genres are in the same alphabetical case
    genre = genre.lower()

    # Filter movies containing the genre efficiently
    genre_movies = df[df['genres_list'].apply(lambda genres: genre in genres)].copy()

    # Vectorized calculations for weighted rating
    v = genre_movies['vote_count']
    R = genre_movies['vote_average']
    genre_movies['weighted_rating'] = (v / (v + m) * R) + (m / (v + m) * C)

    # Calculate genre index for prioritization
    genre_movies['genre_index'] = genre_movies['genres_list'].apply(lambda x: x.index(genre))

    # Apply age penalty (vectorized)
    genre_movies['age_penalty'] = (1 - 0.02 * (current_year - genre_movies['release_year'])).clip(lower=0.5)

    # Adjust final score
    genre_movies['adjusted_score'] = genre_movies['weighted_rating'] * genre_movies['age_penalty']

    # Sort results by relevance and quality
    top_movies = genre_movies.sort_values(
        by=['genre_index', 'adjusted_score', 'popularity'],
        ascending=[True, False, False]
    ).head(top_n)

    return top_movies[['poster_path', 'title', 'release_date']]

df = genre_based_recommender(genre='Action', top_n=20)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

df = genre_based_recommender('Animation',top_n=100)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

df = genre_based_recommender('Adventure',top_n=100)

BASE_URL = "https://image.tmdb.org/t/p/w200"
df['poster_path'] = df['poster_path'].apply(lambda x: f'<img src="{BASE_URL}{x}" width="100"/>')

display(HTML(df.to_html(escape=False)))

	adult	backdrop_path	belongs_to_collection	budget	genres	homepage	id	imdb_id	original_language	original_title	...	spoken_languages	status	tagline	title	video	vote_average	vote_count	cast	crew	keywords
0	False	/lpDEXY3BnS6j7qyYOTcfXsgLKyd.jpg	NaN	0	[{'id': 99, 'name': 'Documentary'}, {'id': 18,...	NaN	327217	tt10071790	ja	"BLOW THE NIGHT!" 夜をぶっとばせ	...	[{'english_name': 'Japanese', 'iso_639_1': 'ja...	Released	NaN	"BLOW THE NIGHT!" Let's Spend the Night Together	False	7.000	1	[{'adult': False, 'gender': 1, 'id': 2169383, ...	[{'adult': False, 'gender': 2, 'id': 82464, 'k...	[{'id': 11700, 'name': 'rock music'}, {'id': 2...
1	False	/bJjQ7T4YE5Az3sELdRuCgr94bn0.jpg	NaN	0	[{'id': 35, 'name': 'Comedy'}]	https://tubitv.com/movies/367616/1-cheerleader...	41371	tt1637976	en	#1 Cheerleader Camp	...	[{'english_name': 'English', 'iso_639_1': 'en'...	Released	Grab your spirit sticks!	#1 Cheerleader Camp	False	4.782	86	[{'adult': False, 'gender': 1, 'id': 21862, 'k...	[{'adult': False, 'gender': 2, 'id': 30053, 'k...	[{'id': 6075, 'name': 'sports'}, {'id': 2920, ...
2	False	/n4LoaH4IRimyHAHHtv9ZdJEDkHq.jpg	NaN	0	[{'id': 99, 'name': 'Documentary'}]	https://numbersintonames.wixsite.com/numbersin...	684453	tt11604612	fr	Numéro 387	...	[{'english_name': 'French', 'iso_639_1': 'fr',...	Released	NaN	#387	False	8.000	1	[]	[{'adult': False, 'gender': 1, 'id': 2146773, ...	[{'id': 2580, 'name': 'shipwreck'}, {'id': 106...
3	False	/hWpVSEMWDvG49ezvpksnS5BSnvB.jpg	NaN	6300000	[{'id': 27, 'name': 'Horror'}, {'id': 28, 'nam...	NaN	614696	tt10620868	ko	#살아있다	...	[{'english_name': 'Korean', 'iso_639_1': 'ko',...	Released	You must survive.	#Alive	False	7.285	1605	[{'adult': False, 'gender': 2, 'id': 572225, '...	[{'adult': False, 'gender': 2, 'id': 63445, 'k...	[{'id': 10685, 'name': 'escape'}, {'id': 6511,...
4	False	/pYziM5SEmptPW0LdNhWvjzR2zD1.jpg	NaN	0	[{'id': 99, 'name': 'Documentary'}, {'id': 18,...	https://www.annefrankparallelstories.com	610643	tt9850370	en	#AnneFrank. Parallel Stories	...	[{'english_name': 'English', 'iso_639_1': 'en'...	Released	NaN	#AnneFrank. Parallel Stories	False	7.106	47	[{'adult': False, 'gender': 1, 'id': 15735, 'k...	[{'adult': False, 'gender': 2, 'id': 420499, '...	[{'id': 6985, 'name': 'anne frank'}]

	cast	keywords	director	genres
0	[namietakada, kazumikawai]	[rockmusic, yankii, furyo, sukeban, non-profes...	chūseisone	[documentary, drama]
1	[charlenetilton, jaygillespie, harmonyblossom]	[sports, cheerleader, teenagegirl]	markquod	[comedy]
2	[]	[shipwreck, tragedy, drowned, migrants]	madeleineleroyer	[documentary]
3	[yooah-in, parkshin-hye, leehyun-wook]	[escape, alone, survival, drone, zombie, apart...	choil	[horror, action, adventure, thriller]
4	[helenmirren, annefrank, martinagatti]	[annefrank]	sabinafedeli	[documentary, drama, history]

	title	release_date	weighted_rating	similarity_score	final_score
8	Jujutsu Kaisen 0	2021-12-24	7.980661	0.344031	1.489526
0	Demon Slayer: Kimetsu no Yaiba -To the Swordsmith Village-	2023-02-03	7.051279	0.481543	1.467004
7	Black Clover: Sword of the Wizard King	2023-06-16	7.444158	0.346989	1.411564
1	Berserk: The Golden Age Arc I - The Egg of the King	2012-02-03	7.085522	0.404577	1.406719
42	Attack on Titan: Wings of Freedom	2015-06-27	7.630864	0.300965	1.400450
11	The Seven Deadly Sins: Prisoners of the Sky	2018-08-18	7.407840	0.334855	1.395803
16	Digimon Adventure: Last Evolution Kizuna	2020-02-21	7.432276	0.329690	1.395078
5	Made in Abyss: Dawn of the Deep Soul	2020-01-17	7.264908	0.347826	1.385388
20	Berserk: The Golden Age Arc III - The Advent	2013-02-01	7.294912	0.323029	1.368812
4	Berserk: The Golden Age Arc II - The Battle for Doldrey	2012-06-23	7.053510	0.354005	1.358931
10	Big Fish & Begonia	2016-07-08	7.146227	0.334855	1.356561
12	Fate/stay night: Heaven's Feel III. Spring Song	2020-08-15	7.116849	0.334367	1.351739
33	Made in Abyss: Wandering Twilight	2019-01-18	7.215147	0.304348	1.340968
17	Inuyasha the Movie: Affections Touching Across Time	2001-12-15	7.069889	0.323029	1.335058
13	Fate/stay night: Heaven's Feel I. Presage Flower	2017-10-14	7.002084	0.334367	1.334524
2	Fate/stay night: Heaven's Feel II. Lost Butterfly	2019-01-12	6.733633	0.376867	1.330382
19	Cardcaptor Sakura: The Movie	1999-08-21	7.035093	0.323029	1.329839
34	Attack on Titan: Chronicle	2020-07-17	7.095689	0.303433	1.322271
21	Black Butler: Book of the Atlantic	2017-01-21	6.907437	0.323029	1.310690
38	The Monkey King: Reborn	2021-04-02	6.935082	0.300965	1.296082

	title	release_date	weighted_rating	similarity_score	final_score
0	John Wick: Chapter 2	2017-02-08	7.299036	0.476731	1.500077
2	John Wick: Chapter 3 - Parabellum	2019-05-15	7.411792	0.438529	1.484518
1	SPL: Kill Zone	2005-11-18	6.521443	0.471940	1.379365
24	The Man from Nowhere	2010-08-04	7.400463	0.313050	1.376162
28	Le Samouraï	1967-10-25	7.419086	0.307794	1.374487
3	Black Rain	1989-09-22	6.484031	0.359092	1.277833
5	Kung Fu Jungle	2014-10-31	6.499267	0.353553	1.275410
43	The Fate of the Furious	2017-04-12	6.864819	0.288675	1.275097
15	Hitman's Wife's Bodyguard	2021-06-14	6.649097	0.322749	1.271701
50	Sicario: Day of the Soldado	2018-06-27	6.841049	0.286039	1.269290
13	Branded to Kill	1967-06-15	6.605237	0.322749	1.265122
29	Rumble in the Bronx	1995-01-21	6.666293	0.306970	1.260869
9	May God Save Us	2016-10-28	6.484640	0.337100	1.259231
31	Fear Over the City	1975-04-09	6.650092	0.298807	1.251500
6	The Memory of a Killer	2003-10-15	6.340690	0.346410	1.245552
23	SPL 2: A Time for Consequences	2015-06-18	6.457358	0.316228	1.237397
20	Raging Fire	2021-07-28	6.446161	0.316228	1.235718
32	Tokyo Drifter	1966-04-10	6.514277	0.298807	1.231128
7	The Fable	2019-06-21	6.209815	0.337100	1.218007
52	Safe	2012-04-16	6.447993	0.282843	1.207615

	poster_path	title	release_date	weighted_rating	similarity_score	final_score
0	/6WBeq4fCfn7AN0o21W9qNcRF2l9.jpg	Iron Man 2	2010-04-28	6.856841	0.687500	1.612901
5	/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg	Avengers: Infinity War	2018-04-25	7.790908	0.503115	1.596284
3	/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg	The Avengers	2012-04-25	7.441158	0.545705	1.580023
2	/rAGiXaUfPzY7CDEyNKUofk3Kw2e.jpg	Captain America: Civil War	2016-04-27	7.221297	0.562500	1.561319
14	/or06FN3Dka5tukK1e9sl16pB3iy.jpg	Avengers: Endgame	2019-04-24	7.749752	0.447214	1.542594
1	/qhPtAc1TKbMPqNvcdXSOn9Bn7hZ.jpg	Iron Man 3	2013-04-18	6.910474	0.573539	1.524080
6	/y4MBh0EjBlMuOzv9axM4qJlmhzz.jpg	Guardians of the Galaxy Vol. 2	2017-04-19	7.319397	0.500000	1.522909
4	/uxzzxijgPIY7slzFvMotPv8wjKA.jpg	Black Panther	2018-02-13	7.185923	0.516398	1.516827
19	/iiZZdoQBEYBv6id8su7ImL0oCbD.jpg	Spider-Man: Into the Spider-Verse	2018-12-06	7.645959	0.433013	1.514955
16	/r7vmZjiyZw9rpJMQJdXpjgiCOk9.jpg	Guardians of the Galaxy	2014-07-30	7.551120	0.437500	1.504543
9	/tVFRpFw3xTedgPGqxW0AOI8Qhh0.jpg	Captain America: The Winter Soldier	2014-03-20	7.321503	0.471405	1.498919
12	/rzRwTcFvttcN1ZpX2xv4j3tSdJu.jpg	Thor: Ragnarok	2017-10-02	7.296437	0.458831	1.484472
7	/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg	Avengers: Age of Ultron	2015-04-22	7.120346	0.485071	1.480362
8	/8YxOIPrabqkQCOKKbuxaz9IcqhO.jpg	Ant-Man	2015-07-14	6.996948	0.471405	1.450236
17	/4q2NNj4S5dG2RLF9CpXsej7yXl.jpg	Spider-Man: Far From Home	2019-06-28	7.168629	0.436436	1.446265
13	/fwBl3J2aEXru6mrr9Xg8O99Iz2K.jpg	Doctor Strange in the Multiverse of Madness	2022-05-04	7.051686	0.451848	1.441824
21	/r2J02Z2OpNTctfOSN1Ydgii51I3.jpg	Guardians of the Galaxy Vol. 3	2023-05-03	7.193039	0.426401	1.441397
11	/lFByFSLV5WDJEv3KabbdAF959F2.jpg	Eternals	2021-11-03	6.895695	0.458831	1.424361
23	/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg	Shang-Chi and the Legend of the Ten Rings	2021-09-01	7.147496	0.412479	1.422732
10	/qnqGbB22YJ7dSs4o6M7exTpNxPz.jpg	Ant-Man and the Wasp: Quantumania	2023-02-15	6.773072	0.471405	1.416655

Applying The Improved Hyprid Recommendation Algorithm on larger Dataset¶

About The Dataset:¶

Metadata Datasets Overview¶

1.`movies_metadata.csv`¶

2.`credits.csv`¶

3.`keywords.csv`¶

Imports¶

Loading data¶

Data Preprocessing¶

Getting The Recommendation Algorithms¶

`IMDb` Popularity Funciton`¶

Our Recommendation Function¶

Lets do some improvements to make the modern movies come first¶

Fixing missing movie posters¶

Lets Do Some Optimization As It Takes So High Memory Cost¶

Building Genre-Based Recommendation System¶

Preprocessing function (run once)¶

Constants computed once globally¶

Genre-Based Recommendation Function¶

	title	release_date	weighted_rating	similarity_score	final_score
5	Avengers: Infinity War	2018-04-25	7.815896	0.503115	1.436548
14	Avengers: Endgame	2019-04-24	7.776584	0.447214	1.402513
2	Captain America: Civil War	2016-04-27	7.239841	0.562500	1.390411
0	Iron Man 2	2010-04-28	6.869009	0.687500	1.378738
18	Spider-Man: Into the Spider-Verse	2018-12-06	7.679748	0.433013	1.372540
4	Black Panther	2018-02-13	7.204046	0.516398	1.372158
3	The Avengers	2012-04-25	7.459850	0.545705	1.370609
6	Guardians of the Galaxy Vol. 2	2017-04-19	7.340526	0.500000	1.368689
20	Guardians of the Galaxy Vol. 3	2023-05-03	7.226457	0.426401	1.357128
13	Doctor Strange in the Multiverse of Madness	2022-05-04	7.076467	0.451848	1.347737
12	Thor: Ragnarok	2017-10-02	7.317621	0.458831	1.337946
9	Ant-Man and the Wasp: Quantumania	2023-02-15	6.792757	0.471405	1.335686
1	Iron Man 3	2013-04-18	6.923232	0.573539	1.333838
15	Guardians of the Galaxy	2014-07-30	7.572827	0.437500	1.327978
17	Spider-Man: Far From Home	2019-06-28	7.190530	0.436436	1.324857
11	Eternals	2021-11-03	6.916150	0.458831	1.324552
23	Shang-Chi and the Legend of the Ten Rings	2021-09-01	7.174687	0.412479	1.324057
10	Captain America: The Winter Soldier	2014-03-20	7.344475	0.471405	1.323631
7	Avengers: Age of Ultron	2015-04-22	7.136847	0.485071	1.316705
24	Black Widow	2021-07-07	7.070013	0.401478	1.304015

	title	release_date	weighted_rating	similarity_score	final_score
1	Guardians of the Galaxy Vol. 2	2017-04-19	7.329954	0.580948	1.446732
0	Captain America: Civil War	2016-04-27	7.232182	0.580948	1.430348
22	Avengers: Infinity War	2018-04-25	7.796863	0.461880	1.426446
2	The Avengers	2012-04-25	7.448900	0.563602	1.425954
9	Guardians of the Galaxy	2014-07-30	7.558680	0.516398	1.416332
8	Thor: Ragnarok	2017-10-02	7.307414	0.533114	1.411500
4	Captain America: The Winter Soldier	2014-03-20	7.332917	0.547723	1.410541
7	Doctor Strange in the Multiverse of Madness	2022-05-04	7.070898	0.533333	1.406523
15	Guardians of the Galaxy Vol. 3	2023-05-03	7.213164	0.495434	1.402475
6	Black Panther: Wakanda Forever	2022-11-09	6.988542	0.538382	1.400074
23	Spider-Man: Into the Spider-Verse	2018-12-06	7.656001	0.447214	1.399570
5	Ant-Man and the Wasp: Quantumania	2023-02-15	6.799993	0.547723	1.388450
13	Spider-Man: Far From Home	2019-06-28	7.182586	0.507093	1.388129
33	Avengers: Endgame	2019-04-24	7.756572	0.404145	1.387003
17	Shang-Chi and the Legend of the Ten Rings	2021-09-01	7.165308	0.486864	1.381603
3	Ant-Man	2015-07-14	7.010420	0.547723	1.376492
12	Iron Man	2008-04-30	7.375555	0.516398	1.366450
14	Avengers: Age of Ultron	2015-04-22	7.131848	0.500979	1.359278
21	Doctor Strange	2016-10-25	7.220261	0.461880	1.347952
25	Deadpool	2016-02-09	7.377868	0.429669	1.344961

	title	release_date	weighted_rating	similarity_score	final_score
12	The Martian	2015-09-30	7.669214	0.308607	1.317658
56	Doctor Who: The Day of the Doctor	2013-11-23	7.793803	0.250000	1.289185
45	Alien	1979-05-25	8.176204	0.257172	1.279236
18	Star Trek Into Darkness	2013-05-05	7.304323	0.285831	1.254812
51	2001: A Space Odyssey	1968-04-02	8.051635	0.251259	1.240624
44	Gravity	2013-10-03	7.148622	0.261488	1.219575
7	Life	2017-03-22	6.411599	0.324443	1.181084
30	Oblivion	2013-04-10	6.611149	0.277778	1.166155
3	Stowaway	2021-06-24	5.893868	0.340207	1.136819
52	Prometheus	2012-05-30	6.530443	0.251259	1.136652
61	Moonfall	2022-02-03	6.316368	0.245737	1.125065
28	Zathura: A Space Adventure	2005-11-06	6.378510	0.277778	1.123953
16	Alien: Covenant	2017-05-09	6.091512	0.294884	1.122574
9	A Space Program	2015-03-18	5.559872	0.314270	1.068388
1	Journey to the Far Side of the Sun	1969-08-27	5.839734	0.372678	1.059546
21	Dédalo	2013-09-11	5.617851	0.284268	1.051373
33	Solaris	2002-11-27	5.857232	0.270369	1.051005
38	Welcome to the Space Show	2010-02-18	5.773963	0.261488	1.049259
6	SpaceCamp	1986-06-06	5.726536	0.326860	1.045164
31	After the World Ended	2015-01-08	5.589861	0.272166	1.043356

	title	release_date	weighted_rating	similarity_score	final_score
1	I Want to Eat Your Pancreas	2018-09-01	8.147662	0.377964	1.411878
55	Your Name.	2016-08-26	8.487554	0.273009	1.369867
12	Rascal Does Not Dream of a Dreaming Girl	2019-06-15	7.941004	0.317554	1.351714
56	A Whisker Away	2020-06-18	7.811582	0.273009	1.311608
4	Fate/stay night: Heaven's Feel III. Spring Song	2020-08-15	7.324444	0.349927	1.305455
25	Sword Art Online the Movie -Progressive- Aria of a Starless Night	2021-10-30	7.490084	0.292770	1.292179
10	Is It Wrong to Try to Pick Up Girls in a Dungeon?: Arrow of the Orion	2019-02-15	7.393010	0.325669	1.291474
5	Fate/stay night: Heaven's Feel I. Presage Flower	2017-10-14	7.144321	0.349927	1.266698
46	The Last: Naruto the Movie	2014-12-06	7.700063	0.276026	1.265991
15	Evangelion: 2.0 You Can (Not) Advance	2009-06-26	7.696711	0.311805	1.261347
11	Neon Genesis Evangelion: The End of Evangelion	1997-07-19	8.226797	0.318511	1.260946
30	The Tunnel to Summer, the Exit of Goodbyes	2022-09-09	7.176039	0.291606	1.259417
7	I've Always Liked You	2016-04-23	7.129991	0.328976	1.245017
29	Mobile Suit Gundam Hathaway	2021-06-11	7.004527	0.291606	1.233121
6	Fate/stay night: Heaven's Feel II. Lost Butterfly	2019-01-12	6.800385	0.338062	1.228785
14	Trinity Seven: Eternity Library & Alchemic Girl	2017-02-01	6.875869	0.314970	1.210713
3	Saekano the Movie: Finale	2019-10-26	6.552441	0.349927	1.207101
26	anohana: The Flower We Saw That Day - The Movie	2012-06-07	7.132431	0.292770	1.197832
0	The Irregular at Magic High School: The Girl Who Summons the Stars	2017-06-17	6.410279	0.377964	1.197678
19	Poupelle of Chimney Town	2020-12-25	6.649672	0.302614	1.192309
34	Princess Principal Crown Handler: Chapter 2	2021-09-23	6.631144	0.290957	1.187874
59	Giovanni's Island	2014-02-22	7.054334	0.272772	1.186291
9	A Certain Magical Index: The Miracle of Endymion	2013-02-23	6.761595	0.327327	1.182545
17	Girls und Panzer: The Movie	2015-11-21	6.722488	0.308607	1.176551
36	Noblesse: Awakening	2016-02-04	6.727780	0.281718	1.164616
31	Detective Conan: Dimensional Sniper	2014-04-19	6.740906	0.290957	1.161045
50	The Garden of Sinners: Paradox Spiral	2008-08-16	7.091925	0.276026	1.158729
38	Harmony	2015-11-13	6.709741	0.279145	1.154988
43	Digimon Adventure tri. Part 6: Future	2018-05-05	6.576993	0.276026	1.154080
20	Mardock Scramble: The First Compression	2010-11-06	6.733573	0.302614	1.145235
37	How I Live Now	2013-09-10	6.649797	0.281718	1.138115
33	Orion	2015-08-01	6.478689	0.290957	1.135294
45	BLAME!	2017-05-20	6.441799	0.276026	1.132142
53	The Relative Worlds	2019-01-25	6.307426	0.276026	1.127446
60	Expelled from Paradise	2014-11-15	6.536257	0.272772	1.124122
58	My Love	2006-08-27	6.915350	0.272772	1.123899
48	Ruin	2011-03-01	6.641896	0.276026	1.121868
22	Fist of the North Star: The Legend of Toki	2008-03-26	6.602949	0.302614	1.118131
18	Bitter-Sweet	2009-04-14	6.521129	0.308607	1.118102
24	Fist of the North Star: The Legend of Kenshiro	2008-10-11	6.599812	0.302614	1.117755
32	The Garden of Sinners: A Study in Murder (Part 1)	2007-12-29	6.680561	0.290957	1.113804
13	Fist of the North Star: Legend of Raoh - Chapter of Fierce Fight	2007-04-28	6.537388	0.314970	1.112952
57	Psychic School Wars	2012-11-09	6.520036	0.273009	1.110908
16	Room 314	2006-01-01	6.483822	0.308607	1.096483
23	Fist of the North Star: The Legend of Yuria	2007-02-23	6.468288	0.302614	1.096258
49	The Garden of Sinners: Overlooking View	2007-12-01	6.605832	0.276026	1.094683
27	Banner of the Stars III	2005-08-06	6.566387	0.292770	1.089907
41	Hostile	2018-03-08	5.967144	0.276026	1.080898
44	Pale Cocoon	2005-10-29	6.533763	0.276026	1.074607
40	Lakeer	2004-05-14	6.543547	0.276026	1.070066
47	Gensomaden Saiyuki Requiem: For the One Not Chosen	2001-08-18	6.510409	0.276026	1.048947
51	Martian Successor Nadesico: The Motion Picture - Prince of Darkness	1998-08-01	6.585409	0.276026	1.040804
8	Other Worlds	1999-01-02	6.223913	0.327327	1.038023
42	Gangland	2001-01-01	6.415279	0.276026	1.037531
54	Roja	1992-08-15	6.756623	0.276026	1.027064
2	A Wind Named Amnesia	1990-12-22	6.333610	0.363137	1.024109
52	O Pioneers!	1992-02-02	6.521129	0.276026	0.998805
39	Riki-Oh: The Wall of Hell	1989-06-25	6.641249	0.279145	0.998197
35	Riki-Oh 2: Child of Destruction	1990-08-24	6.479964	0.285714	0.989024
28	Zeitoun	NaN	6.566387	0.292770	0.987050

	title	release_date	weighted_rating	similarity_score	final_score
0	The Martian	2015-09-30	7.650826	0.279145	1.292864
27	Mad Max: Fury Road	2015-05-13	7.558228	0.238366	1.254022
3	Moon	2009-06-12	7.470669	0.266501	1.249744
26	Blade Runner 2049	2017-10-04	7.458145	0.238366	1.246313
35	Contact	1997-07-11	7.306172	0.227273	1.177523
51	The Space Between Us	2017-01-26	6.933072	0.220193	1.170947
18	Cloud Atlas	2012-10-26	6.839974	0.244558	1.165591
54	Divergent	2014-03-14	6.897041	0.220193	1.160172
19	Doctor Who: Last Christmas	2014-12-25	6.707483	0.241747	1.152082
29	Frequency	2000-04-28	6.988156	0.232621	1.149449
32	Chappie	2015-03-04	6.742454	0.227921	1.149027
48	A.I. Artificial Intelligence	2001-06-29	6.984215	0.222277	1.144093
47	Planet of the Apes	1968-02-07	7.435439	0.222277	1.127272
16	Elysium	2013-08-07	6.483863	0.246183	1.126113
36	The Matrix Resurrections	2021-12-16	6.446276	0.227273	1.125948
1	Origin: Spirits of the Past	2006-01-07	6.426964	0.269680	1.120209
21	Vesper	2022-08-17	6.297131	0.241747	1.120044
55	The Matrix Revolutions	2003-11-05	6.690814	0.217597	1.110004
2	LX 2048	2020-09-25	6.034968	0.269680	1.103277
5	Aniara	2019-02-01	6.093696	0.257130	1.099640

	title	release_date	weighted_rating	similarity_score	final_score
1	I Want to Eat Your Pancreas	2018-09-01	8.147464	0.377964	1.411182
55	Your Name.	2016-08-26	8.487527	0.273009	1.368856
12	Rascal Does Not Dream of a Dreaming Girl	2019-06-15	7.940492	0.317554	1.351149
56	A Whisker Away	2020-06-18	7.811368	0.273009	1.311246
3	Fate/stay night: Heaven's Feel III. Spring Song	2020-08-15	7.323262	0.349927	1.304977
26	Sword Art Online the Movie -Progressive- Aria of a Starless Night	2021-10-30	7.489241	0.292770	1.291910
10	Is It Wrong to Try to Pick Up Girls in a Dungeon?: Arrow of the Orion	2019-02-15	7.392057	0.325669	1.290855
4	Fate/stay night: Heaven's Feel I. Presage Flower	2017-10-14	7.143332	0.349927	1.265738
42	The Last: Naruto the Movie	2014-12-06	7.699904	0.276026	1.264627
29	The Tunnel to Summer, the Exit of Goodbyes	2022-09-09	7.174288	0.291606	1.259207
15	Evangelion: 2.0 You Can (Not) Advance	2009-06-26	7.696375	0.311805	1.259122
11	Neon Genesis Evangelion: The End of Evangelion	1997-07-19	8.226603	0.318511	1.256721
7	I've Always Liked You	2016-04-23	7.128704	0.328976	1.243854
30	Mobile Suit Gundam Hathaway	2021-06-11	7.003073	0.291606	1.232778
6	Fate/stay night: Heaven's Feel II. Lost Butterfly	2019-01-12	6.799422	0.338062	1.228166
14	Trinity Seven: Eternity Library & Alchemic Girl	2017-02-01	6.873823	0.314970	1.209627
5	Saekano the Movie: Finale	2019-10-26	6.549893	0.349927	1.206291
0	The Irregular at Magic High School: The Girl Who Summons the Stars	2017-06-17	6.408428	0.377964	1.196615
25	anohana: The Flower We Saw That Day - The Movie	2012-06-07	7.131219	0.292770	1.196006
23	Poupelle of Chimney Town	2020-12-25	6.647596	0.302614	1.191724

	title	release_date	weighted_rating	similarity_score	final_score
0	X2	2003-04-27	6.704621	0.919866	1.575269
1	X-Men: The Last Stand	2006-05-24	6.400858	0.725241	1.414691
4	X-Men: Days of Future Past	2014-05-15	7.091564	0.553372	1.402623
10	Spider-Man: Into the Spider-Verse	2018-12-06	7.624969	0.435194	1.397230
11	Logan	2017-02-28	7.333658	0.430820	1.356558
2	The Wolverine	2013-07-23	6.396853	0.603023	1.350280
5	X-Men: First Class	2011-06-01	6.910187	0.511891	1.344431
3	X-Men: Apocalypse	2016-05-18	6.462246	0.568535	1.342895
32	Guardians of the Galaxy Vol. 2	2017-04-19	7.245288	0.376889	1.309281
34	The Avengers	2012-04-25	7.400639	0.365636	1.306572
20	Deadpool 2	2018-05-10	7.107797	0.389249	1.303927
21	Black Panther	2018-02-13	7.090476	0.389249	1.301848
30	Captain America: Civil War	2016-04-27	7.133394	0.376889	1.293114
28	Iron Man	2008-04-30	7.309506	0.376889	1.292329
48	Captain America: The Winter Soldier	2014-03-20	7.240436	0.355335	1.285822
60	Thor: Ragnarok	2017-10-02	7.216046	0.345857	1.284670
7	Batman and Harley Quinn	2017-08-14	6.378469	0.476731	1.273155
36	Avengers: Age of Ultron	2015-04-22	7.016112	0.365636	1.268648
15	The Flash	2023-06-13	6.511506	0.415029	1.263600
6	X-Men Origins: Wolverine	2009-04-28	6.321851	0.497468	1.258544

	poster_path	title	release_date	popularity	vote_average	vote_count	genres	id
0	/vZKottmj32FAQVdWEcK01TcgNSU.jpg	"BLOW THE NIGHT!" Let's Spend the Night Together	1983-03-19	3.187	7.000	1	['documentary', 'drama']	327217
1	/lWD6BvbuRPuipIieOtiXcsEzfrB.jpg	#1 Cheerleader Camp	2010-07-27	7.730	4.782	86	['comedy']	41371
2	/lLFA42mBVpQ0NZDg8ONv9a0onKm.jpg	#387	2020-03-16	0.888	8.000	1	['documentary']	684453
3	/zqf711LsnQ5CcW3rISFw2t7OYzb.jpg	#Alive	2020-06-24	26.284	7.285	1605	['horror', 'action', 'adventure', 'thriller']	614696
4	/hkC4yNDFmW1yQuQhtZydMeRuaAb.jpg	#AnneFrank. Parallel Stories	2019-10-18	6.385	7.106	47	['documentary', 'drama', 'history']	610643

	title	release_date
71402	Top Gun: Maverick	2022-05-24
77186	Zack Snyder's Justice League	2021-03-18
53227	Spider-Man: No Way Home	2021-12-15
30979	John Wick: Chapter 4	2023-03-22
41695	Nobody	2021-03-18
53226	Spider-Man: Into the Spider-Verse	2018-12-06
20063	Everything Everywhere All at Once	2022-03-24
16659	Demon Slayer -Kimetsu no Yaiba- The Movie: Mugen Train	2020-10-16
52933	Sound of Freedom	2023-07-03
24193	Godzilla vs. Kong	2021-03-24
38553	Mission: Impossible - Dead Reckoning Part One	2023-07-08
71758	Transformers: Rise of the Beasts	2023-06-06
52804	Sonic the Hedgehog 2	2022-03-30
50867	Shang-Chi and the Legend of the Ten Rings	2021-09-01
10926	Bullet Train	2022-08-03
67999	The Suicide Squad	2021-07-28
20791	Fast X	2023-05-17
20279	Extraction 2	2023-06-09
60822	The Gentlemen	2020-01-01
69600	The Woman King	2022-09-16

	title	release_date
53224	Spider-Man: Across the Spider-Verse	2023-05-31
68044	The Super Mario Bros. Movie	2023-04-05
52893	Soul	2020-12-25
24939	Guillermo del Toro's Pinocchio	2022-11-09
35888	Luca	2021-06-17
47161	Raya and the Last Dragon	2021-03-03
19268	Elemental	2023-06-14
18284	Dragon Ball Super: Super Hero	2022-06-11
51684	Sing 2	2021-12-01
19522	Encanto	2021-10-13
32579	Klaus	2019-11-08
64456	The Mitchells vs. the Machines	2021-04-22
72257	Turning Red	2022-03-10
35907	Luck	2022-08-05
48608	Ron's Gone Wrong	2021-10-14
27751	How to Train Your Dragon: The Hidden World	2019-01-03
38336	Minions: The Rise of Gru	2022-06-29
56912	The Bad Guys	2022-03-17
31253	Jujutsu Kaisen 0	2021-12-24
31504	Justice League Dark: Apokolips War	2020-05-05
39138	Mortal Kombat Legends: Scorpion's Revenge	2020-04-12
76125	Wish Dragon	2021-01-15
2467	A Silent Voice: The Movie	2016-09-17
54913	Suzume	2022-11-11
58863	The Croods: A New Age	2020-11-25
74733	Weathering with You	2019-06-19
41413	Nimona	2023-06-23
57696	The Boss Baby: Family Business	2021-07-01
76303	Wolfwalkers	2020-10-26
66903	The Sea Beast	2022-06-24
34461	Lightyear	2022-06-15
19935	Evangelion: 3.0+1.0 Thrice Upon a Time	2021-03-08
2835	A Whisker Away	2020-06-18
15156	DC League of Super-Pets	2022-07-27
53241	Spies in Disguise	2019-12-04
48820	Ruby Gillman, Teenage Kraken	2023-06-28
40096	My Hero Academia: Heroes Rising	2019-12-20
28421	I Want to Eat Your Pancreas	2018-09-01
77414	Zootopia	2016-02-11
29571	Inside Out	2015-06-09
57760	The Boy, the Mole, the Fox and the Horse	2022-12-25
35864	Loving Vincent	2017-06-22
27444	Hotel Transylvania: Transformania	2022-02-25
43703	PAW Patrol: The Movie	2021-08-09
69556	The Witcher: Nightmare of the Wolf	2021-08-23
6857	Back to the Outback	2021-12-03
66147	The Quintessential Quintuplets Movie	2022-05-20
42907	One Piece Film Red	2022-08-06
8075	Belle	2021-07-16
37406	Mavka: The Forest Song	2023-03-02
38389	Miraculous World: Shanghai – The Legend of Ladydragon	2021-05-15
27750	How to Train Your Dragon: Homecoming	2019-10-25
12789	Chip 'n Dale: Rescue Rangers	2022-05-20
74611	We Bare Bears: The Movie	2020-06-30
61912	The House	2022-01-14
8980	Black Clover: Sword of the Wizard King	2023-06-16
47440	Red Shoes and the Seven Dwarfs	2019-07-25
36965	Marcel the Shell with Shoes On	2022-06-24
29496	Injustice	2021-10-09
41039	New Gods: Nezha Reborn	2021-02-06
13105	Ciao Alberto	2021-11-12
62111	The Ice Age Adventures of Buck Wild	2022-01-28
39603	Mummies	2023-01-05
73953	Vivo	2021-07-30
56354	The Addams Family 2	2021-10-01
49946	Scoob!	2020-07-08
43622	Over the Moon	2020-10-16
28275	I Lost My Body	2019-11-06
7480	Batman: The Long Halloween, Part One	2021-06-21
32795	Kubo and the Two Strings	2016-08-18
67108	The Seven Deadly Sins: Cursed by Light	2021-07-02
20708	Far from the Tree	2021-11-24
19661	Entergalactic	2022-09-28
31512	Justice Society: World War II	2021-04-27
31090	Josee, the Tiger and the Fish	2020-12-25
40097	My Hero Academia: Two Heroes	2018-08-03
40098	My Hero Academia: World Heroes' Mission	2021-08-06
39137	Mortal Kombat Legends: Battle of the Realms	2021-08-31
54049	Straight Outta Nowhere: Scooby-Doo! Meets Courage the Cowardly Dog	2021-09-14
24749	Green Snake	2021-07-23
57749	The Boy and the Heron	2023-07-14
73827	Violet Evergarden: The Movie	2020-09-18
23961	Given	2020-08-22
7464	Batman and Superman: Battle of the Super Sons	2022-10-17
7481	Batman: The Long Halloween, Part Two	2021-07-26
55256	Tad, the Lost Explorer and the Emerald Tablet	2022-08-24
56172	That Time I Got Reincarnated as a Slime the Movie: Scarlet Bond	2022-11-25
63287	The Lego Batman Movie	2017-02-08
47082	Rascal Does Not Dream of a Dreaming Girl	2019-06-15
7245	Bao	2018-06-15
68013	The Summit of the Gods	2021-09-22
18457	Drifting Home	2022-09-09
56630	The Angry Birds Movie 2	2019-08-02
31558	KONOSUBA – God's blessing on this wonderful world! Legend of Crimson	2019-08-30
53273	Spirit Untamed	2021-05-20
73186	Us Again	2021-03-05
60378	The First Slam Dunk	2022-12-03
71221	Tom and Jerry Cowboy Up!	2022-01-24
42918	One Piece: Stampede	2019-08-09
52983	South Park: Post COVID: The Return of COVID	2021-12-16

	title	release_date
46481	Puss in Boots: The Last Wish	2022-12-07
6552	Avengers: Endgame	2019-04-24
6553	Avengers: Infinity War	2018-04-25
18644	Dungeons & Dragons: Honor Among Thieves	2023-03-23
19634	Enola Holmes 2	2022-11-04
29680	Interstellar	2014-11-05
71134	Togo	2019-12-20
58036	The Call of the Wild	2020-02-19
29985	Isle of Dogs	2018-03-23
47201	Ready Player One	2018-03-28
56352	The Adam Project	2022-03-11
19633	Enola Holmes	2020-09-23
11628	Captain Fantastic	2016-07-08
31397	Jurassic World Dominion	2022-06-01
29396	Indiana Jones and the Dial of Destiny	2023-06-28
68384	The Three Musketeers: D'Artagnan	2023-04-05
71197	Tom Clancy's Without Remorse	2021-04-29
38643	Moana	2016-10-13
1514	A Dog's Purpose	2017-01-19
11615	Captain America: Civil War	2016-04-27
22493	Freaks Out	2021-10-28
31298	Jumanji: The Next Level	2019-12-04
42427	Okja	2017-06-28
13032	Christopher Robin	2018-08-02
8630	Big Hero 6	2014-10-24
65485	The Peanut Butter Falcon	2019-08-09
66997	The Secret Life of Pets 2	2019-05-24
43759	Paddington 2	2017-11-09
61373	The Green Knight	2021-07-29
27891	Hunt for the Wilderpeople	2016-03-31
72109	True Spirit	2023-01-26
73683	Vicky and Her Mystery	2021-12-15
53592	Star Wars: The Force Awakens	2015-12-15
13087	Chupa	2023-04-07
2965	AINBO: Spirit of the Amazon	2021-02-09
58933	The Curse of Bridge Hollow	2022-10-14
9636	Blue Miracle	2021-05-27
21334	Finding Dory	2016-06-16
71929	Trick or Treat Scooby-Doo!	2022-10-04
31299	Jumanji: Welcome to the Jungle	2017-12-09

Applying The Improved Hyprid Recommendation Algorithm on larger Dataset¶

About The Dataset:¶

Metadata Datasets Overview¶

1.movies_metadata.csv¶

2.credits.csv¶

3.keywords.csv¶

Imports¶

Loading data¶

Data Preprocessing¶

Getting The Recommendation Algorithms¶

IMDb Popularity Funciton`¶

Our Recommendation Function¶

Lets do some improvements to make the modern movies come first¶

Fixing missing movie posters¶

Lets Do Some Optimization As It Takes So High Memory Cost¶

Building Genre-Based Recommendation System¶

Preprocessing function (run once)¶

Constants computed once globally¶

Genre-Based Recommendation Function¶

1.`movies_metadata.csv`¶

2.`credits.csv`¶

3.`keywords.csv`¶

`IMDb` Popularity Funciton`¶