feat(fusion_accounting_ocr): pluggable OCR for vendor bills
Replaces Enterprise's account_invoice_extract with a Fusion-native pipeline: Stage 1 (text extraction): Tesseract OCRs the bill attachment via pytesseract + pdf2image. Pluggable OCRProvider adapter pattern allows future Mindee / Google Document AI / Ollama-vision backends. Stage 2 (field parsing): The fusion_accounting_ai LLMProvider reads the raw OCR text and returns structured invoice fields (vendor, invoice number, dates, amounts, line items) as JSON. Draft invoice fields are auto-populated for empty-only fields (never overwriting user-entered data). Vendor matching by name against res.partner with supplier_rank > 0. Adds: - account.move.ocr_state (selection: not_requested/pending/processing/ done/failed/manual) - account.move.ocr_raw_text, ocr_extracted_data (Json), ocr_backend, ocr_confidence - fusion.ocr.log (audit trail per OCR run) - res.company.fusion_ocr_enabled / fusion_ocr_default_backend / auto_run - /fusion/ocr/request_for_invoice JSON-RPC endpoint Backend availability detected at runtime via OCRProvider.is_available() classmethods. Tesseract 5.3.4 + pytesseract 0.3.13 + pdf2image 1.17.0 are installed in the container. Tests: 13 (TesseractAdapter availability + image OCR; flow tests for draft autofill, no-attachment guard, customer-invoice guard, ref-not- overwritten; field parser empty/clean-json/markdown-fence/bad-JSON/ provider-exception). All pass on westin-v19 OrbStack VM. Made-with: Cursor
This commit is contained in:
3
fusion_accounting_ocr/tests/__init__.py
Normal file
3
fusion_accounting_ocr/tests/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from . import test_tesseract_adapter
|
||||
from . import test_invoice_ocr_flow
|
||||
from . import test_field_parser
|
||||
74
fusion_accounting_ocr/tests/test_field_parser.py
Normal file
74
fusion_accounting_ocr/tests/test_field_parser.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from odoo.tests import tagged
|
||||
from odoo.tests.common import TransactionCase
|
||||
|
||||
from odoo.addons.fusion_accounting_ocr.services.invoice_field_parser import (
|
||||
parse_invoice_fields,
|
||||
)
|
||||
|
||||
|
||||
@tagged('post_install', '-at_install')
|
||||
class TestFieldParser(TransactionCase):
|
||||
|
||||
def test_parser_handles_empty_text(self):
|
||||
result = parse_invoice_fields(self.env, '')
|
||||
self.assertIsNone(result['total'])
|
||||
self.assertEqual(result['line_items'], [])
|
||||
|
||||
def test_parser_handles_no_provider_gracefully(self):
|
||||
# Without an LLM provider configured, parse should return an empty
|
||||
# result dict rather than crashing.
|
||||
result = parse_invoice_fields(self.env, 'INVOICE 12345 Total $100')
|
||||
self.assertIn('total', result)
|
||||
self.assertIn('line_items', result)
|
||||
self.assertIsInstance(result['line_items'], list)
|
||||
|
||||
def test_parser_consumes_clean_json(self):
|
||||
provider = MagicMock()
|
||||
provider.complete.return_value = {
|
||||
'content': (
|
||||
'{"vendor_name": "Acme Co", "invoice_number": "INV-1",'
|
||||
' "invoice_date": "2026-04-20", "due_date": null,'
|
||||
' "currency": "CAD", "subtotal": 90.0, "tax_total": 10.0,'
|
||||
' "total": 100.0, "line_items": ['
|
||||
'{"description": "Widget", "quantity": 1, "unit_price": 90.0,'
|
||||
' "amount": 90.0}]}'
|
||||
),
|
||||
}
|
||||
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||
self.assertEqual(result['vendor_name'], 'Acme Co')
|
||||
self.assertEqual(result['invoice_number'], 'INV-1')
|
||||
self.assertEqual(result['total'], 100.0)
|
||||
self.assertEqual(len(result['line_items']), 1)
|
||||
self.assertEqual(result['line_items'][0]['description'], 'Widget')
|
||||
|
||||
def test_parser_strips_markdown_fences(self):
|
||||
provider = MagicMock()
|
||||
provider.complete.return_value = {
|
||||
'content': (
|
||||
'```json\n'
|
||||
'{"vendor_name": "Beta Ltd", "invoice_number": "B-2",'
|
||||
' "invoice_date": null, "due_date": null, "currency": null,'
|
||||
' "subtotal": null, "tax_total": null, "total": 5.5,'
|
||||
' "line_items": []}\n'
|
||||
'```'
|
||||
),
|
||||
}
|
||||
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||
self.assertEqual(result['vendor_name'], 'Beta Ltd')
|
||||
self.assertEqual(result['total'], 5.5)
|
||||
|
||||
def test_parser_returns_empty_on_invalid_json(self):
|
||||
provider = MagicMock()
|
||||
provider.complete.return_value = {'content': 'not json at all'}
|
||||
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||
self.assertIsNone(result['total'])
|
||||
self.assertEqual(result['line_items'], [])
|
||||
|
||||
def test_parser_returns_empty_on_provider_exception(self):
|
||||
provider = MagicMock()
|
||||
provider.complete.side_effect = RuntimeError('boom')
|
||||
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||
self.assertIsNone(result['total'])
|
||||
self.assertEqual(result['line_items'], [])
|
||||
117
fusion_accounting_ocr/tests/test_invoice_ocr_flow.py
Normal file
117
fusion_accounting_ocr/tests/test_invoice_ocr_flow.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import base64
|
||||
import io
|
||||
from unittest.mock import patch
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
from odoo.exceptions import UserError
|
||||
from odoo.tests import tagged
|
||||
from odoo.tests.common import TransactionCase
|
||||
|
||||
|
||||
@tagged('post_install', '-at_install')
|
||||
class TestInvoiceOcrFlow(TransactionCase):
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.partner = self.env['res.partner'].create({
|
||||
'name': 'Test Vendor',
|
||||
'supplier_rank': 1,
|
||||
})
|
||||
self.move = self.env['account.move'].create({
|
||||
'move_type': 'in_invoice',
|
||||
'partner_id': self.partner.id,
|
||||
})
|
||||
|
||||
def test_ocr_state_default(self):
|
||||
self.assertEqual(self.move.ocr_state, 'not_requested')
|
||||
|
||||
def test_action_request_ocr_no_attachment_raises(self):
|
||||
with self.assertRaises(UserError):
|
||||
self.move.action_request_ocr()
|
||||
|
||||
def test_action_request_ocr_with_image(self):
|
||||
img = Image.new('RGB', (800, 120), color='white')
|
||||
draw = ImageDraw.Draw(img)
|
||||
try:
|
||||
from PIL import ImageFont
|
||||
font = ImageFont.truetype(
|
||||
'/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 36,
|
||||
)
|
||||
except Exception:
|
||||
font = None
|
||||
draw.text((20, 30), "TOTAL $50.00 INV-9999", fill='black', font=font)
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format='PNG')
|
||||
|
||||
self.env['ir.attachment'].create({
|
||||
'name': 'test_invoice.png',
|
||||
'datas': base64.b64encode(buf.getvalue()),
|
||||
'res_model': 'account.move',
|
||||
'res_id': self.move.id,
|
||||
'mimetype': 'image/png',
|
||||
})
|
||||
|
||||
# Mock the LLM call to avoid a real API roundtrip.
|
||||
with patch(
|
||||
'odoo.addons.fusion_accounting_ocr.models.account_move.parse_invoice_fields',
|
||||
return_value={
|
||||
'vendor_name': None,
|
||||
'invoice_number': 'INV-9999',
|
||||
'invoice_date': None,
|
||||
'due_date': None,
|
||||
'currency': None,
|
||||
'subtotal': None,
|
||||
'tax_total': None,
|
||||
'total': 50.0,
|
||||
'line_items': [],
|
||||
},
|
||||
):
|
||||
self.move.action_request_ocr()
|
||||
|
||||
self.assertEqual(self.move.ocr_state, 'done')
|
||||
self.assertEqual(self.move.ocr_backend, 'tesseract')
|
||||
self.assertGreater(self.move.ocr_confidence, 0)
|
||||
self.assertIsNotNone(self.move.ocr_extracted_data)
|
||||
# Parsed invoice_number should land on the invoice's ref field.
|
||||
self.assertEqual(self.move.ref, 'INV-9999')
|
||||
# OCR log row was created.
|
||||
self.assertEqual(len(self.move.ocr_log_ids), 1)
|
||||
log = self.move.ocr_log_ids
|
||||
self.assertEqual(log.backend, 'tesseract')
|
||||
self.assertGreater(log.raw_text_length, 0)
|
||||
|
||||
def test_apply_does_not_overwrite_user_entered_ref(self):
|
||||
self.move.ref = 'USER-SET-REF'
|
||||
img = Image.new('RGB', (400, 80), color='white')
|
||||
ImageDraw.Draw(img).text((10, 30), "INV-7777", fill='black')
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format='PNG')
|
||||
self.env['ir.attachment'].create({
|
||||
'name': 't.png',
|
||||
'datas': base64.b64encode(buf.getvalue()),
|
||||
'res_model': 'account.move',
|
||||
'res_id': self.move.id,
|
||||
'mimetype': 'image/png',
|
||||
})
|
||||
with patch(
|
||||
'odoo.addons.fusion_accounting_ocr.models.account_move.parse_invoice_fields',
|
||||
return_value={
|
||||
'vendor_name': None, 'invoice_number': 'INV-7777',
|
||||
'invoice_date': None, 'due_date': None, 'currency': None,
|
||||
'subtotal': None, 'tax_total': None, 'total': None,
|
||||
'line_items': [],
|
||||
},
|
||||
):
|
||||
self.move.action_request_ocr()
|
||||
|
||||
# User-entered ref must not be overwritten.
|
||||
self.assertEqual(self.move.ref, 'USER-SET-REF')
|
||||
|
||||
def test_only_vendor_bills_supported(self):
|
||||
customer_invoice = self.env['account.move'].create({
|
||||
'move_type': 'out_invoice',
|
||||
'partner_id': self.partner.id,
|
||||
})
|
||||
with self.assertRaises(UserError):
|
||||
customer_invoice.action_request_ocr()
|
||||
47
fusion_accounting_ocr/tests/test_tesseract_adapter.py
Normal file
47
fusion_accounting_ocr/tests/test_tesseract_adapter.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import io
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
from odoo.tests import tagged
|
||||
from odoo.tests.common import TransactionCase
|
||||
|
||||
from odoo.addons.fusion_accounting_ocr.services.ocr_providers.tesseract_adapter import (
|
||||
TesseractAdapter,
|
||||
)
|
||||
|
||||
|
||||
@tagged('post_install', '-at_install')
|
||||
class TestTesseractAdapter(TransactionCase):
|
||||
|
||||
def test_is_available(self):
|
||||
# In our container tesseract + pytesseract + pdf2image are pre-installed.
|
||||
self.assertTrue(TesseractAdapter.is_available())
|
||||
|
||||
def test_extract_simple_text_image(self):
|
||||
# Generate a tiny PNG with the text "INVOICE 12345 Total $100".
|
||||
# Use a slightly larger image and try to load a TTF font for
|
||||
# tesseract reliability; fall back to default bitmap font otherwise.
|
||||
img = Image.new('RGB', (800, 120), color='white')
|
||||
draw = ImageDraw.Draw(img)
|
||||
try:
|
||||
from PIL import ImageFont
|
||||
font = ImageFont.truetype(
|
||||
'/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 36,
|
||||
)
|
||||
except Exception:
|
||||
font = None
|
||||
draw.text((20, 30), "INVOICE 12345 Total $100", fill='black', font=font)
|
||||
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format='PNG')
|
||||
png_bytes = buf.getvalue()
|
||||
|
||||
adapter = TesseractAdapter()
|
||||
result = adapter.extract(png_bytes, mimetype='image/png')
|
||||
|
||||
self.assertEqual(result.backend, 'tesseract')
|
||||
self.assertEqual(result.error, '')
|
||||
self.assertEqual(result.pages, 1)
|
||||
self.assertGreater(len(result.raw_text), 0)
|
||||
# Tesseract should pick up the digits at minimum.
|
||||
self.assertIn('12345', result.raw_text.replace(' ', ''))
|
||||
Reference in New Issue
Block a user