Video Embedding Clustering with Feluda

This notebook demonstrates how to use the feluda to extract video embeddings and cluster them into N clusters or auto clusters. It includes:

  • Setting up Feluda and its operators.

  • Using video data from a subset of the UCF101 video dataset dataset. We are using this for demo, but can be replaced with any video dataset.

  • Generating video embeddings using feluda CLIP video operator.

  • Using Feluda’s clustering operator to cluster vidoes.

  • Visualizing the clusters with video thumbnails.

GitHub Open In Colab

Install dependencies conditionally based on whether the notebook is running in Colab or locally.

%%time
import sys

IN_COLAB = "google.colab" in sys.modules
print("Running Notebook in Google Colab" if IN_COLAB else "Running Notebook locally")

if IN_COLAB:
    # Since Google Colab has preinstalled libraries like tensorflow and numba, we create a folder called feluda_custom_venv and isolate the environment there.
    # This is done to avoid any conflicts with the preinstalled libraries.
    %pip install uv
    !mkdir -p /content/feluda_custom_venv
    !uv pip install --target=/content/feluda_custom_venv --prerelease allow feluda feluda-vid-vec-rep-clip feluda-cluster-embeddings opencv-python matplotlib > /dev/null 2>&1

    sys.path.insert(0, "/content/feluda_custom_venv")
else:
    !uv pip install feluda feluda-vid-vec-rep-clip feluda-cluster-embeddings opencv-python matplotlib > /dev/null 2>&1
Running Notebook locally
Using Python 3.10.12 environment at: /home/aatman/Aatman/Tattle/feluda/.venv
Audited 6 packages in 11ms
CPU times: user 6.38 ms, sys: 4.13 ms, total: 10.5 ms
Wall time: 138 ms
import os
import tarfile
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
from huggingface_hub import hf_hub_download
from tqdm.notebook import tqdm

from feluda.factory import VideoFactory

We’ll use two operators for this example. One for extracting embeddings and other for clustering.

from feluda.operators import ClusterEmbeddings, VidVecRep

cluster_operator = ClusterEmbeddings()
vid_vec_clip_operator = VidVecRep()

Data Preparation

dataset_name = "UCF101_subset/train"
hf_dataset_identifier = "sayakpaul/ucf101-subset"
filename = "UCF101_subset.tar.gz"

# Download and extract the UCF101 subset dataset
print("Downloading and extracting dataset...")
file_path = hf_hub_download(
    repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset"
)

with tarfile.open(file_path) as t:
    t.extractall(".")
print(f"Dataset extracted to {dataset_name}")
Downloading and extracting dataset...
Dataset extracted to UCF101_subset/train
/var/folders/4p/bw6h5x8x1nb_17vsgfc12dz00000gn/T/ipykernel_40587/954250746.py:12: DeprecationWarning: Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata. Use the filter argument to control this behavior.
  t.extractall(".")
# Create thumbnails directory
thumbnail_dir = Path("thumbnails")
thumbnail_dir.mkdir(exist_ok=True)
def get_video_thumbnail(video_path: str, save_path: str) -> str | None:
    """Extract and save the first frame from the video as a thumbnail.

    Args:
        video_path: Path to the video file
        save_path: Directory to save the thumbnail

    Returns:
        Path to the saved thumbnail or None if extraction failed

    """
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()  # Read the first frame

    if ret:
        thumbnail_path = os.path.join(
            save_path, os.path.basename(video_path).replace(".avi", "_thumbnail.jpg")
        )
        cv2.imwrite(thumbnail_path, frame)  # Save the thumbnail as a JPEG
        cap.release()
        return thumbnail_path  # Return the path to the saved thumbnail

    cap.release()
    return None

Video Processing and Embedding Extraction

print("Processing videos and extracting embeddings...")
operator_parameters = []
sub_folder_name = os.listdir(f"{dataset_name}")

# Process videos from each class directory
for class_dir in tqdm(sub_folder_name, desc="Processing classes"):
    temp_list = os.listdir(os.path.join(dataset_name, class_dir))

    # Process up to 5 videos per class
    video_count = 0
    for video_file in temp_list:
        if video_file == "UCF101" or video_count >= 5:
            continue

        video_full_path = os.path.join(dataset_name, class_dir, video_file)

        try:
            # Extract and save the video thumbnail
            thumbnail_path = get_video_thumbnail(video_full_path, thumbnail_dir)

            # Process video and extract embedding
            video = VideoFactory.make_from_file_on_disk(video_full_path)
            embedding = vid_vec_clip_operator.run(video)
            average_vector = next(embedding)

            # Store video path and embedding
            operator_parameters.append(
                {
                    "payload": video_full_path,
                    "embedding": average_vector.get("vid_vec"),
                    "class": class_dir,  # Store class name for later analysis
                }
            )

            video_count += 1
        except Exception as e:
            print(f"Error processing {video_full_path}: {e}")
            continue

print(f"Successfully processed {len(operator_parameters)} videos")
Processing videos and extracting embeddings...
Successfully processed 50 videos

Clustering Videos

# Apply clustering to the video embeddings
print("Clustering video embeddings...")
n_clusters = 10  # Number of clusters to create
clusters = cluster_operator.run(
    operator_parameters, n_clusters=n_clusters, modality="video"
)

# Sort clusters by label for consistent display
clusters = {key: clusters[key] for key in sorted(clusters)}
Clustering video embeddings...

Visualizing Clusters

# Function to load thumbnails
def load_thumbnail(payload):
    """Load the thumbnail from the pre-saved thumbnail folder.

    Args:
        payload: Path to the video file

    Returns:
        Loaded thumbnail image or None if not found

    """
    video_filename = os.path.basename(payload)
    thumbnail_filename = video_filename.replace(".avi", "_thumbnail.jpg")
    thumbnail_path = os.path.join(thumbnail_dir, thumbnail_filename)

    if os.path.exists(thumbnail_path):
        return cv2.imread(thumbnail_path)
    print(f"Thumbnail not found for {video_filename}")
    return None
for cluster_label, video_paths in clusters.items():
    num_videos = len(video_paths)

    # Create a subplot for each video in the cluster
    fig, axes = plt.subplots(1, max(1, num_videos), figsize=(20, 5))

    # Handle case where there's only one video in a cluster
    if num_videos == 1:
        axes = [axes]

    # Plot thumbnails
    for i, video_path in enumerate(video_paths):
        # Extract class name from path for display
        class_name = os.path.basename(os.path.dirname(video_path))

        video_thumbnail = load_thumbnail(video_path)
        if video_thumbnail is not None:
            # Resize and convert color format
            video_thumbnail = cv2.cvtColor(
                cv2.resize(video_thumbnail, (160, 120)), cv2.COLOR_BGR2RGB
            )

            # Display thumbnail
            axes[i].imshow(video_thumbnail)
            axes[i].set_title(f"Class: {class_name}", fontsize=10)
            axes[i].axis("off")

    # Set overall title for the cluster
    plt.suptitle(f"Cluster {cluster_label}", fontsize=16)
    plt.tight_layout()
    plt.subplots_adjust(top=0.85)
    plt.show()
../_images/f873cc1d96c4b9f051d014939ca873026e2d1d1905aa69f8e081a387d5758a6b.png ../_images/b99d156d8c0b64fc6beee92763d747898129beeb628209691a00d758a30b1648.png ../_images/d845f3e1db55e6c2df1932fa5a47fe7e07335ac5da7ad6e8ed312dce79ff423f.png ../_images/f142223d13a0a89420b00184ead6dc1bb6ee3325c73c1c82ef9011901312f6c7.png ../_images/8741492b0cc2a8b926064a4bfc0cde51e9f7cc1409ee38d5157a824fa28c022c.png ../_images/995bda896d4193aa66848d5160b5ef8e168ec0b9a02fe23a9f57a3b0b4ba3788.png ../_images/092e32128750df4b8df9d5ee0556b52c95d9df47465a96c5ac27688df540b5c0.png ../_images/19fa881f3eb0db7f192c03b59ab227c94a1c6cdaab38535c96a71dd6f9e5e2de.png ../_images/aed199e94fdef5b47b71beec4c909a6a0a0f2f686a79c83beb8abe09d3c5eaae.png ../_images/335adce842bd618f7a30c53516fae46f48c1ced09f7a459dc4a2b6e1f6daee95.png
import shutil

# Remove thumbnail directory
if os.path.exists(thumbnail_dir):
    shutil.rmtree(thumbnail_dir)
    print(f"Removed {thumbnail_dir}")

# Remove dataset directory
if os.path.exists("UCF101_subset"):
    shutil.rmtree("UCF101_subset")
    print("Removed UCF101_subset")

# Remove downloaded dataset file
if os.path.exists(file_path):
    os.remove(file_path)
    print("Removed dataset")


cluster_operator.cleanup()
vid_vec_clip_operator.cleanup()
Removed thumbnails
Removed UCF101_subset
Removed dataset