Odoo-Modules/fusion_accounting_ocr/services/ocr_providers/base.py

"""OCRProvider contract - every backend must conform.

Mirrors the LLMProvider pattern in fusion_accounting_ai. Future adapters
(Mindee, Google Document AI, Ollama-vision) drop in alongside the default
tesseract adapter without touching account.move.
"""

from abc import ABC, abstractmethod
from dataclasses import dataclass, field


@dataclass
class OCRResult:
    raw_text: str = ''
    confidence: float = 0.0  # 0.0–1.0
    pages: int = 0
    backend: str = ''
    error: str = ''
    metadata: dict = field(default_factory=dict)


class OCRProvider(ABC):
    """Abstract OCR backend. Subclasses implement extract()."""

    name: str = 'base'

    @abstractmethod
    def extract(self, image_or_pdf_bytes: bytes, *, mimetype: str = 'application/pdf') -> OCRResult:
        """Extract text from raw bytes.

        ``mimetype`` hints whether to PDF-render (poppler) or image-decode
        (PIL) the bytes. Implementations should still inspect the byte
        signature for safety.
        """
        ...

    @classmethod
    def is_available(cls) -> bool:
        """Return True if the backend's runtime deps are present."""
        return True