Replaces Enterprise's account_invoice_extract with a Fusion-native pipeline: Stage 1 (text extraction): Tesseract OCRs the bill attachment via pytesseract + pdf2image. Pluggable OCRProvider adapter pattern allows future Mindee / Google Document AI / Ollama-vision backends. Stage 2 (field parsing): The fusion_accounting_ai LLMProvider reads the raw OCR text and returns structured invoice fields (vendor, invoice number, dates, amounts, line items) as JSON. Draft invoice fields are auto-populated for empty-only fields (never overwriting user-entered data). Vendor matching by name against res.partner with supplier_rank > 0. Adds: - account.move.ocr_state (selection: not_requested/pending/processing/ done/failed/manual) - account.move.ocr_raw_text, ocr_extracted_data (Json), ocr_backend, ocr_confidence - fusion.ocr.log (audit trail per OCR run) - res.company.fusion_ocr_enabled / fusion_ocr_default_backend / auto_run - /fusion/ocr/request_for_invoice JSON-RPC endpoint Backend availability detected at runtime via OCRProvider.is_available() classmethods. Tesseract 5.3.4 + pytesseract 0.3.13 + pdf2image 1.17.0 are installed in the container. Tests: 13 (TesseractAdapter availability + image OCR; flow tests for draft autofill, no-attachment guard, customer-invoice guard, ref-not- overwritten; field parser empty/clean-json/markdown-fence/bad-JSON/ provider-exception). All pass on westin-v19 OrbStack VM. Made-with: Cursor
75 lines
3.1 KiB
Python
75 lines
3.1 KiB
Python
from unittest.mock import MagicMock
|
|
|
|
from odoo.tests import tagged
|
|
from odoo.tests.common import TransactionCase
|
|
|
|
from odoo.addons.fusion_accounting_ocr.services.invoice_field_parser import (
|
|
parse_invoice_fields,
|
|
)
|
|
|
|
|
|
@tagged('post_install', '-at_install')
|
|
class TestFieldParser(TransactionCase):
|
|
|
|
def test_parser_handles_empty_text(self):
|
|
result = parse_invoice_fields(self.env, '')
|
|
self.assertIsNone(result['total'])
|
|
self.assertEqual(result['line_items'], [])
|
|
|
|
def test_parser_handles_no_provider_gracefully(self):
|
|
# Without an LLM provider configured, parse should return an empty
|
|
# result dict rather than crashing.
|
|
result = parse_invoice_fields(self.env, 'INVOICE 12345 Total $100')
|
|
self.assertIn('total', result)
|
|
self.assertIn('line_items', result)
|
|
self.assertIsInstance(result['line_items'], list)
|
|
|
|
def test_parser_consumes_clean_json(self):
|
|
provider = MagicMock()
|
|
provider.complete.return_value = {
|
|
'content': (
|
|
'{"vendor_name": "Acme Co", "invoice_number": "INV-1",'
|
|
' "invoice_date": "2026-04-20", "due_date": null,'
|
|
' "currency": "CAD", "subtotal": 90.0, "tax_total": 10.0,'
|
|
' "total": 100.0, "line_items": ['
|
|
'{"description": "Widget", "quantity": 1, "unit_price": 90.0,'
|
|
' "amount": 90.0}]}'
|
|
),
|
|
}
|
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
|
self.assertEqual(result['vendor_name'], 'Acme Co')
|
|
self.assertEqual(result['invoice_number'], 'INV-1')
|
|
self.assertEqual(result['total'], 100.0)
|
|
self.assertEqual(len(result['line_items']), 1)
|
|
self.assertEqual(result['line_items'][0]['description'], 'Widget')
|
|
|
|
def test_parser_strips_markdown_fences(self):
|
|
provider = MagicMock()
|
|
provider.complete.return_value = {
|
|
'content': (
|
|
'```json\n'
|
|
'{"vendor_name": "Beta Ltd", "invoice_number": "B-2",'
|
|
' "invoice_date": null, "due_date": null, "currency": null,'
|
|
' "subtotal": null, "tax_total": null, "total": 5.5,'
|
|
' "line_items": []}\n'
|
|
'```'
|
|
),
|
|
}
|
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
|
self.assertEqual(result['vendor_name'], 'Beta Ltd')
|
|
self.assertEqual(result['total'], 5.5)
|
|
|
|
def test_parser_returns_empty_on_invalid_json(self):
|
|
provider = MagicMock()
|
|
provider.complete.return_value = {'content': 'not json at all'}
|
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
|
self.assertIsNone(result['total'])
|
|
self.assertEqual(result['line_items'], [])
|
|
|
|
def test_parser_returns_empty_on_provider_exception(self):
|
|
provider = MagicMock()
|
|
provider.complete.side_effect = RuntimeError('boom')
|
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
|
self.assertIsNone(result['total'])
|
|
self.assertEqual(result['line_items'], [])
|