Source code for operators.detect_text_in_image.detect_text_in_image

import gc
import os
import shutil
from typing import Any

import pytesseract
from PIL import Image

from feluda import Operator
from feluda.factory import ImageFactory


[docs] class DetectTextInImage(Operator): """Operator to detect text in images using Tesseract OCR."""
[docs] def __init__(self, psm: int = 6, oem: int = 1, tesseract_cmd: str = None) -> None: """Initialize the `DetectTextInImage` class. Args: psm (int): Page segmentation mode for Tesseract (default: 6) oem (int): OCR Engine mode for Tesseract (default: 1) """ self.psm = psm self.oem = oem self.tesseract_cmd = tesseract_cmd or shutil.which("tesseract") self.validate_system() self.validate_languages()
[docs] def validate_system(self) -> None: """Validate that Tesseract OCR is installed and accessible. Raises: RuntimeError: If Tesseract is not installed or not in PATH. """ if self.tesseract_cmd: pytesseract.pytesseract.tesseract_cmd = self.tesseract_cmd try: pytesseract.get_tesseract_version() except pytesseract.TesseractNotFoundError: raise RuntimeError( "Tesseract OCR is not installed or not in PATH. " "Please install Tesseract to use this operator." )
[docs] def validate_languages(self) -> None: """Validate that required language packs are installed. Checks for English, Hindi, Tamil, and Telugu language support. """ required_langs = ["eng", "hin", "tam", "tel"] try: installed_langs = pytesseract.get_languages() missing_langs = [ lang for lang in required_langs if lang not in installed_langs ] if missing_langs: print( f"Warning: Some required language packs are not installed: {', '.join(missing_langs)}" ) print("OCR may not work correctly for these languages.") except Exception as e: print(f"Warning: Could not verify language pack installation: {e}")
[docs] def run(self, file: ImageFactory, remove_after_processing: bool = False) -> str: """Run the text detection operator. Args: file (ImageFactory): ImageFactory object remove_after_processing (bool): Whether to remove the file after processing Returns: str: Detected text from the image """ if not isinstance(file, dict) or "path" not in file: raise ValueError( "Invalid file object. Expected ImageFactory object with 'path' key." ) image_path = file["path"] if not os.path.exists(image_path): raise FileNotFoundError(f"Image file not found: {image_path}") try: with Image.open(image_path) as load_image: text = pytesseract.image_to_string( load_image, lang="eng+hin+tam+tel", config=f"--psm {self.psm} --oem {self.oem}", ) return text except Exception as e: raise RuntimeError(f"Text detection failed: {e}") from e finally: if remove_after_processing: try: if os.path.exists(image_path): os.remove(image_path) except OSError as e: print(f"Warning: Could not delete file {image_path}: {e}")
[docs] def cleanup(self) -> None: """Cleans up resources used by the operator.""" gc.collect()
[docs] def state(self) -> dict[str, Any]: """Returns the current state of the operator. Returns: dict: State of the operator including PSM and OEM settings """ return { "psm": self.psm, "oem": self.oem, }