import contextlib
import gc
import os
import shutil
import subprocess
import tempfile
from typing import Generator
import torch
from PIL import Image
from transformers import AutoProcessor, CLIPModel
from feluda import Operator
from feluda.factory import VideoFactory
[docs]
class VidVecRep(Operator):
"""Operator to extract video vector representations using CLIP-ViT-B-32."""
[docs]
def __init__(self) -> None:
"""Initialize the `VidVecRep` class."""
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = None
self.processor = None
self.frame_images = []
self.feature_matrix = None
self.load_model()
self.validate_system()
[docs]
def load_model(self) -> None:
"""Load the CLIP model and processor onto the specified device."""
try:
self.processor = AutoProcessor.from_pretrained(
"openai/clip-vit-base-patch32"
)
self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
except Exception as e:
raise RuntimeError(
f"Failed to load the CLIP model or processor: {e!s} "
) from e
self.model.to(self.device)
[docs]
@staticmethod
def validate_system() -> None:
"""Validate that required system dependencies are available.
Checks if FFmpeg is installed and accessible in the system PATH.
"""
if shutil.which("ffmpeg") is None:
raise RuntimeError(
"FFmpeg is not installed or not found in system PATH. "
"Please install FFmpeg to use this operator."
)
[docs]
def get_mean_feature(self) -> torch.Tensor:
"""Compute the mean feature vector from the feature matrix.
Returns:
torch.Tensor: Mean feature vector
"""
if self.feature_matrix is None or len(self.feature_matrix) == 0:
raise ValueError("Feature matrix is empty. Please analyze a video first.")
return torch.mean(self.feature_matrix, dim=0)
[docs]
def analyze(self, fname: str) -> None:
"""Analyze the video file and extract features.
Args:
fname (str): Path to the video file
"""
self.frame_images = self.extract_frames(fname)
if not self.frame_images:
raise ValueError(f"No frames could be extracted from: {fname!s}")
self.feature_matrix = self.extract_features(self.frame_images)
[docs]
def gendata(self) -> Generator[dict, None, None]:
"""Yield video vector representations from the `VidVecRep` prototype.
Yields:
dict: A dictionary containing:
- `vid_vec` (list): Vector representation
- `is_avg` (bool): A flag indicating whether the vector is the average vector or a I-frame vector
"""
if self.feature_matrix is None or len(self.feature_matrix) == 0:
raise ValueError("Feature matrix is empty. Please analyze a video first.")
# average vector
yield {
"vid_vec": self.get_mean_feature().tolist(),
"is_avg": True,
}
# I-frame vectors
for keyframe in self.feature_matrix:
yield {
"vid_vec": keyframe.tolist(),
"is_avg": False,
}
[docs]
def run(
self, file: VideoFactory, remove_after_processing: bool | None = False
) -> Generator[dict, None, None]:
"""Run the operator.
Args:
file (dict): `VideoFactory` file object
remove_after_processing (bool): Whether to remove the file after processing
Returns:
generator: Yields video and I-frame vector representations
"""
if not isinstance(file, dict) or "path" not in file:
raise ValueError(
"Invalid file object. Expected VideoFactory object with 'path' key."
)
fname = file["path"]
if not os.path.exists(fname):
raise FileNotFoundError(f"File not found: {fname}")
try:
self.analyze(fname)
return self.gendata()
finally:
if remove_after_processing:
with contextlib.suppress(FileNotFoundError):
os.remove(fname)
[docs]
def cleanup(self) -> None:
"""Cleans up resources used by the operator."""
del self.model
del self.processor
self.frame_images.clear()
self.feature_matrix = None
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
[docs]
def state(self) -> dict:
"""Returns the current state of the operator.
Returns:
dict: State of the operator
"""
return {
"device": self.device,
"model": self.model,
"processor": self.processor,
"frame_images": self.frame_images.copy(),
"feature_matrix": self.feature_matrix.clone().cpu().tolist()
if self.feature_matrix is not None
else [],
}