"""OCRProvider contract - every backend must conform. Mirrors the LLMProvider pattern in fusion_accounting_ai. Future adapters (Mindee, Google Document AI, Ollama-vision) drop in alongside the default tesseract adapter without touching account.move. """ from abc import ABC, abstractmethod from dataclasses import dataclass, field @dataclass class OCRResult: raw_text: str = '' confidence: float = 0.0 # 0.0–1.0 pages: int = 0 backend: str = '' error: str = '' metadata: dict = field(default_factory=dict) class OCRProvider(ABC): """Abstract OCR backend. Subclasses implement extract().""" name: str = 'base' @abstractmethod def extract(self, image_or_pdf_bytes: bytes, *, mimetype: str = 'application/pdf') -> OCRResult: """Extract text from raw bytes. ``mimetype`` hints whether to PDF-render (poppler) or image-decode (PIL) the bytes. Implementations should still inspect the byte signature for safety. """ ... @classmethod def is_available(cls) -> bool: """Return True if the backend's runtime deps are present.""" return True