feat(fusion_accounting_ocr): pluggable OCR for vendor bills
Replaces Enterprise's account_invoice_extract with a Fusion-native pipeline: Stage 1 (text extraction): Tesseract OCRs the bill attachment via pytesseract + pdf2image. Pluggable OCRProvider adapter pattern allows future Mindee / Google Document AI / Ollama-vision backends. Stage 2 (field parsing): The fusion_accounting_ai LLMProvider reads the raw OCR text and returns structured invoice fields (vendor, invoice number, dates, amounts, line items) as JSON. Draft invoice fields are auto-populated for empty-only fields (never overwriting user-entered data). Vendor matching by name against res.partner with supplier_rank > 0. Adds: - account.move.ocr_state (selection: not_requested/pending/processing/ done/failed/manual) - account.move.ocr_raw_text, ocr_extracted_data (Json), ocr_backend, ocr_confidence - fusion.ocr.log (audit trail per OCR run) - res.company.fusion_ocr_enabled / fusion_ocr_default_backend / auto_run - /fusion/ocr/request_for_invoice JSON-RPC endpoint Backend availability detected at runtime via OCRProvider.is_available() classmethods. Tesseract 5.3.4 + pytesseract 0.3.13 + pdf2image 1.17.0 are installed in the container. Tests: 13 (TesseractAdapter availability + image OCR; flow tests for draft autofill, no-attachment guard, customer-invoice guard, ref-not- overwritten; field parser empty/clean-json/markdown-fence/bad-JSON/ provider-exception). All pass on westin-v19 OrbStack VM. Made-with: Cursor
This commit is contained in:
150
fusion_accounting_ocr/services/invoice_field_parser.py
Normal file
150
fusion_accounting_ocr/services/invoice_field_parser.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""Stage-2 of the OCR pipeline: parse raw OCR text into structured invoice
|
||||
fields via the configured LLM provider.
|
||||
|
||||
Mirrors the pattern in fusion_accounting_followup/services/followup_text_generator.py:
|
||||
look up an adapter by ir.config_parameter, fall back gracefully when no
|
||||
provider is configured, and never let an LLM hiccup nuke the OCR result.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"You are an invoice field extraction assistant. You read raw OCR text "
|
||||
"from vendor bills and return a strict JSON object with the requested "
|
||||
"fields. You never include commentary or markdown fences. When a field "
|
||||
"cannot be determined from the text you return null for that field."
|
||||
)
|
||||
|
||||
USER_PROMPT = """Given the raw OCR text of a vendor bill, return a JSON object
|
||||
with these fields (use null when unclear):
|
||||
|
||||
{{
|
||||
"vendor_name": <string, the seller/vendor company name>,
|
||||
"invoice_number": <string, the bill or invoice reference number>,
|
||||
"invoice_date": <string, ISO format YYYY-MM-DD>,
|
||||
"due_date": <string or null, ISO format YYYY-MM-DD>,
|
||||
"currency": <string, ISO 4217 code like CAD/USD/EUR>,
|
||||
"subtotal": <number or null>,
|
||||
"tax_total": <number or null>,
|
||||
"total": <number, the grand total amount due>,
|
||||
"line_items": [
|
||||
{{"description": <string>, "quantity": <number or null>,
|
||||
"unit_price": <number or null>, "amount": <number or null>}}
|
||||
]
|
||||
}}
|
||||
|
||||
Return ONLY valid JSON, no commentary, no markdown fences.
|
||||
|
||||
Raw OCR text:
|
||||
---
|
||||
{text}
|
||||
---
|
||||
"""
|
||||
|
||||
|
||||
def parse_invoice_fields(env, raw_text: str, *, provider=None) -> dict:
|
||||
"""Use the configured LLM provider to extract structured invoice fields.
|
||||
|
||||
Returns a dict with the schema above. On any failure (no provider, bad
|
||||
JSON, network error, etc.) returns an all-null result so the OCR raw
|
||||
text is still preserved for the AP user.
|
||||
"""
|
||||
if not raw_text or not raw_text.strip():
|
||||
return _empty_result()
|
||||
|
||||
if provider is None:
|
||||
provider = _get_provider(env)
|
||||
if provider is None:
|
||||
_logger.info(
|
||||
"No LLM provider configured for OCR field parsing; "
|
||||
"raw OCR text preserved, fields left empty."
|
||||
)
|
||||
return _empty_result()
|
||||
|
||||
try:
|
||||
truncated = raw_text[:12000]
|
||||
user = USER_PROMPT.format(text=truncated)
|
||||
response = provider.complete(
|
||||
system=SYSTEM_PROMPT,
|
||||
messages=[{'role': 'user', 'content': user}],
|
||||
max_tokens=1000,
|
||||
temperature=0.1,
|
||||
)
|
||||
content = response.get('content') if isinstance(response, dict) else response
|
||||
if not content:
|
||||
return _empty_result()
|
||||
|
||||
# LLMs sometimes wrap JSON in ```json ... ``` despite instructions.
|
||||
content = content.strip()
|
||||
if content.startswith('```'):
|
||||
content = content.split('```', 2)[1]
|
||||
if content.startswith('json'):
|
||||
content = content[4:]
|
||||
content = content.rsplit('```', 1)[0]
|
||||
|
||||
parsed = json.loads(content.strip())
|
||||
return {
|
||||
'vendor_name': parsed.get('vendor_name'),
|
||||
'invoice_number': parsed.get('invoice_number'),
|
||||
'invoice_date': parsed.get('invoice_date'),
|
||||
'due_date': parsed.get('due_date'),
|
||||
'currency': parsed.get('currency'),
|
||||
'subtotal': parsed.get('subtotal'),
|
||||
'tax_total': parsed.get('tax_total'),
|
||||
'total': parsed.get('total'),
|
||||
'line_items': parsed.get('line_items') or [],
|
||||
}
|
||||
except json.JSONDecodeError as e:
|
||||
_logger.warning("LLM returned non-JSON for OCR field parsing: %s", e)
|
||||
return _empty_result()
|
||||
except Exception as e:
|
||||
_logger.warning("OCR field parsing failed: %s", e)
|
||||
return _empty_result()
|
||||
|
||||
|
||||
def _empty_result():
|
||||
return {
|
||||
'vendor_name': None,
|
||||
'invoice_number': None,
|
||||
'invoice_date': None,
|
||||
'due_date': None,
|
||||
'currency': None,
|
||||
'subtotal': None,
|
||||
'tax_total': None,
|
||||
'total': None,
|
||||
'line_items': [],
|
||||
}
|
||||
|
||||
|
||||
def _get_provider(env):
|
||||
"""Look up the LLM adapter via ir.config_parameter.
|
||||
|
||||
Honours a feature-specific override
|
||||
(``fusion_accounting.provider.ocr_field_parsing``) before falling back
|
||||
to the suite-wide default (``fusion_accounting.provider.default``).
|
||||
Returns None when no adapter is configured/importable.
|
||||
"""
|
||||
param = env['ir.config_parameter'].sudo()
|
||||
name = param.get_param('fusion_accounting.provider.ocr_field_parsing')
|
||||
if not name:
|
||||
name = param.get_param('fusion_accounting.provider.default')
|
||||
if not name:
|
||||
return None
|
||||
try:
|
||||
from odoo.addons.fusion_accounting_ai.services.adapters.openai_adapter import OpenAIAdapter
|
||||
from odoo.addons.fusion_accounting_ai.services.adapters.claude import ClaudeAdapter
|
||||
except ImportError:
|
||||
return None
|
||||
try:
|
||||
if name.startswith('openai'):
|
||||
return OpenAIAdapter(env)
|
||||
if name.startswith('claude'):
|
||||
return ClaudeAdapter(env)
|
||||
except Exception as e:
|
||||
_logger.warning("OCR field parser could not instantiate %s: %s", name, e)
|
||||
return None
|
||||
return None
|
||||
Reference in New Issue
Block a user