Split 49 modules/suites into independent git repos; untrack from monorepo
Each top-level module/suite folder is now its own private repo on GitHub (gsinghpal/<name>) and gitea (admin/<name>), with a fresh single initial commit. The monorepo no longer tracks them (added to .gitignore + git rm --cached); working-tree files are retained on disk and managed in their own repos. The monorepo keeps shared root files (CLAUDE.md, docs/, scripts/, tools/, AGENTS.md, WIP/obsolete dirs) and full history. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,40 +0,0 @@
|
||||
"""OCRProvider contract - every backend must conform.
|
||||
|
||||
Mirrors the LLMProvider pattern in fusion_accounting_ai. Future adapters
|
||||
(Mindee, Google Document AI, Ollama-vision) drop in alongside the default
|
||||
tesseract adapter without touching account.move.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRResult:
|
||||
raw_text: str = ''
|
||||
confidence: float = 0.0 # 0.0–1.0
|
||||
pages: int = 0
|
||||
backend: str = ''
|
||||
error: str = ''
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class OCRProvider(ABC):
|
||||
"""Abstract OCR backend. Subclasses implement extract()."""
|
||||
|
||||
name: str = 'base'
|
||||
|
||||
@abstractmethod
|
||||
def extract(self, image_or_pdf_bytes: bytes, *, mimetype: str = 'application/pdf') -> OCRResult:
|
||||
"""Extract text from raw bytes.
|
||||
|
||||
``mimetype`` hints whether to PDF-render (poppler) or image-decode
|
||||
(PIL) the bytes. Implementations should still inspect the byte
|
||||
signature for safety.
|
||||
"""
|
||||
...
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""Return True if the backend's runtime deps are present."""
|
||||
return True
|
||||
Reference in New Issue
Block a user