Video Embedding Clustering with Feluda

This notebook demonstrates how to use the feluda to extract video embeddings and cluster them into N clusters or auto clusters. It includes:

Setting up Feluda and its operators.
Using video data from a subset of the UCF101 video dataset dataset. We are using this for demo, but can be replaced with any video dataset.
Generating video embeddings using feluda CLIP video operator.
Using Feluda’s clustering operator to cluster vidoes.
Visualizing the clusters with video thumbnails.

Install dependencies conditionally based on whether the notebook is running in Colab or locally.

%%time
import sys

IN_COLAB = "google.colab" in sys.modules
print("Running Notebook in Google Colab" if IN_COLAB else "Running Notebook locally")

if IN_COLAB:
    # Since Google Colab has preinstalled libraries like tensorflow and numba, we create a folder called feluda_custom_venv and isolate the environment there.
    # This is done to avoid any conflicts with the preinstalled libraries.
    %pip install uv
    !mkdir -p /content/feluda_custom_venv
    !uv pip install --target=/content/feluda_custom_venv --prerelease allow feluda feluda-vid-vec-rep-clip feluda-cluster-embeddings opencv-python matplotlib > /dev/null 2>&1

    sys.path.insert(0, "/content/feluda_custom_venv")
else:
    !uv pip install feluda feluda-vid-vec-rep-clip feluda-cluster-embeddings opencv-python matplotlib > /dev/null 2>&1

Running Notebook locally
Using Python 3.10.12 environment at: /home/aatman/Aatman/Tattle/feluda/.venv
Audited 6 packages in 11ms
CPU times: user 6.38 ms, sys: 4.13 ms, total: 10.5 ms
Wall time: 138 ms

import os
import tarfile
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
from huggingface_hub import hf_hub_download
from tqdm.notebook import tqdm

from feluda.factory import VideoFactory

We’ll use two operators for this example. One for extracting embeddings and other for clustering.

from feluda.operators import ClusterEmbeddings, VidVecRep

cluster_operator = ClusterEmbeddings()
vid_vec_clip_operator = VidVecRep()

Data Preparation

dataset_name = "UCF101_subset/train"
hf_dataset_identifier = "sayakpaul/ucf101-subset"
filename = "UCF101_subset.tar.gz"

# Download and extract the UCF101 subset dataset
print("Downloading and extracting dataset...")
file_path = hf_hub_download(
    repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset"
)

with tarfile.open(file_path) as t:
    t.extractall(".")
print(f"Dataset extracted to {dataset_name}")

Downloading and extracting dataset...

Dataset extracted to UCF101_subset/train

/var/folders/4p/bw6h5x8x1nb_17vsgfc12dz00000gn/T/ipykernel_40587/954250746.py:12: DeprecationWarning: Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata. Use the filter argument to control this behavior.
  t.extractall(".")

# Create thumbnails directory
thumbnail_dir = Path("thumbnails")
thumbnail_dir.mkdir(exist_ok=True)

def get_video_thumbnail(video_path: str, save_path: str) -> str | None:
    """Extract and save the first frame from the video as a thumbnail.

    Args:
        video_path: Path to the video file
        save_path: Directory to save the thumbnail

    Returns:
        Path to the saved thumbnail or None if extraction failed

    """
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()  # Read the first frame

    if ret:
        thumbnail_path = os.path.join(
            save_path, os.path.basename(video_path).replace(".avi", "_thumbnail.jpg")
        )
        cv2.imwrite(thumbnail_path, frame)  # Save the thumbnail as a JPEG
        cap.release()
        return thumbnail_path  # Return the path to the saved thumbnail

    cap.release()
    return None

Video Processing and Embedding Extraction

print("Processing videos and extracting embeddings...")
operator_parameters = []
sub_folder_name = os.listdir(f"{dataset_name}")

# Process videos from each class directory
for class_dir in tqdm(sub_folder_name, desc="Processing classes"):
    temp_list = os.listdir(os.path.join(dataset_name, class_dir))

    # Process up to 5 videos per class
    video_count = 0
    for video_file in temp_list:
        if video_file == "UCF101" or video_count >= 5:
            continue

        video_full_path = os.path.join(dataset_name, class_dir, video_file)

        try:
            # Extract and save the video thumbnail
            thumbnail_path = get_video_thumbnail(video_full_path, thumbnail_dir)

            # Process video and extract embedding
            video = VideoFactory.make_from_file_on_disk(video_full_path)
            embedding = vid_vec_clip_operator.run(video)
            average_vector = next(embedding)

            # Store video path and embedding
            operator_parameters.append(
                {
                    "payload": video_full_path,
                    "embedding": average_vector.get("vid_vec"),
                    "class": class_dir,  # Store class name for later analysis
                }
            )

            video_count += 1
        except Exception as e:
            print(f"Error processing {video_full_path}: {e}")
            continue

print(f"Successfully processed {len(operator_parameters)} videos")

Processing videos and extracting embeddings...

Successfully processed 50 videos

Clustering Videos

# Apply clustering to the video embeddings
print("Clustering video embeddings...")
n_clusters = 10  # Number of clusters to create
clusters = cluster_operator.run(
    operator_parameters, n_clusters=n_clusters, modality="video"
)

# Sort clusters by label for consistent display
clusters = {key: clusters[key] for key in sorted(clusters)}

Clustering video embeddings...

Visualizing Clusters

# Function to load thumbnails
def load_thumbnail(payload):
    """Load the thumbnail from the pre-saved thumbnail folder.

    Args:
        payload: Path to the video file

    Returns:
        Loaded thumbnail image or None if not found

    """
    video_filename = os.path.basename(payload)
    thumbnail_filename = video_filename.replace(".avi", "_thumbnail.jpg")
    thumbnail_path = os.path.join(thumbnail_dir, thumbnail_filename)

    if os.path.exists(thumbnail_path):
        return cv2.imread(thumbnail_path)
    print(f"Thumbnail not found for {video_filename}")
    return None

for cluster_label, video_paths in clusters.items():
    num_videos = len(video_paths)

    # Create a subplot for each video in the cluster
    fig, axes = plt.subplots(1, max(1, num_videos), figsize=(20, 5))

    # Handle case where there's only one video in a cluster
    if num_videos == 1:
        axes = [axes]

    # Plot thumbnails
    for i, video_path in enumerate(video_paths):
        # Extract class name from path for display
        class_name = os.path.basename(os.path.dirname(video_path))

        video_thumbnail = load_thumbnail(video_path)
        if video_thumbnail is not None:
            # Resize and convert color format
            video_thumbnail = cv2.cvtColor(
                cv2.resize(video_thumbnail, (160, 120)), cv2.COLOR_BGR2RGB
            )

            # Display thumbnail
            axes[i].imshow(video_thumbnail)
            axes[i].set_title(f"Class: {class_name}", fontsize=10)
            axes[i].axis("off")

    # Set overall title for the cluster
    plt.suptitle(f"Cluster {cluster_label}", fontsize=16)
    plt.tight_layout()
    plt.subplots_adjust(top=0.85)
    plt.show()

../_images/f873cc1d96c4b9f051d014939ca873026e2d1d1905aa69f8e081a387d5758a6b.png

../_images/b99d156d8c0b64fc6beee92763d747898129beeb628209691a00d758a30b1648.png

../_images/d845f3e1db55e6c2df1932fa5a47fe7e07335ac5da7ad6e8ed312dce79ff423f.png

../_images/f142223d13a0a89420b00184ead6dc1bb6ee3325c73c1c82ef9011901312f6c7.png

../_images/8741492b0cc2a8b926064a4bfc0cde51e9f7cc1409ee38d5157a824fa28c022c.png

../_images/995bda896d4193aa66848d5160b5ef8e168ec0b9a02fe23a9f57a3b0b4ba3788.png

../_images/092e32128750df4b8df9d5ee0556b52c95d9df47465a96c5ac27688df540b5c0.png

../_images/19fa881f3eb0db7f192c03b59ab227c94a1c6cdaab38535c96a71dd6f9e5e2de.png

../_images/aed199e94fdef5b47b71beec4c909a6a0a0f2f686a79c83beb8abe09d3c5eaae.png

../_images/335adce842bd618f7a30c53516fae46f48c1ced09f7a459dc4a2b6e1f6daee95.png

import shutil

# Remove thumbnail directory
if os.path.exists(thumbnail_dir):
    shutil.rmtree(thumbnail_dir)
    print(f"Removed {thumbnail_dir}")

# Remove dataset directory
if os.path.exists("UCF101_subset"):
    shutil.rmtree("UCF101_subset")
    print("Removed UCF101_subset")

# Remove downloaded dataset file
if os.path.exists(file_path):
    os.remove(file_path)
    print("Removed dataset")


cluster_operator.cleanup()
vid_vec_clip_operator.cleanup()

Removed thumbnails
Removed UCF101_subset
Removed dataset