feat(fusion_accounting_ocr): pluggable OCR for vendor bills
Replaces Enterprise's account_invoice_extract with a Fusion-native pipeline: Stage 1 (text extraction): Tesseract OCRs the bill attachment via pytesseract + pdf2image. Pluggable OCRProvider adapter pattern allows future Mindee / Google Document AI / Ollama-vision backends. Stage 2 (field parsing): The fusion_accounting_ai LLMProvider reads the raw OCR text and returns structured invoice fields (vendor, invoice number, dates, amounts, line items) as JSON. Draft invoice fields are auto-populated for empty-only fields (never overwriting user-entered data). Vendor matching by name against res.partner with supplier_rank > 0. Adds: - account.move.ocr_state (selection: not_requested/pending/processing/ done/failed/manual) - account.move.ocr_raw_text, ocr_extracted_data (Json), ocr_backend, ocr_confidence - fusion.ocr.log (audit trail per OCR run) - res.company.fusion_ocr_enabled / fusion_ocr_default_backend / auto_run - /fusion/ocr/request_for_invoice JSON-RPC endpoint Backend availability detected at runtime via OCRProvider.is_available() classmethods. Tesseract 5.3.4 + pytesseract 0.3.13 + pdf2image 1.17.0 are installed in the container. Tests: 13 (TesseractAdapter availability + image OCR; flow tests for draft autofill, no-attachment guard, customer-invoice guard, ref-not- overwritten; field parser empty/clean-json/markdown-fence/bad-JSON/ provider-exception). All pass on westin-v19 OrbStack VM. Made-with: Cursor
This commit is contained in:
2
fusion_accounting_ocr/__init__.py
Normal file
2
fusion_accounting_ocr/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
from . import models
|
||||||
|
from . import controllers
|
||||||
39
fusion_accounting_ocr/__manifest__.py
Normal file
39
fusion_accounting_ocr/__manifest__.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
{
|
||||||
|
'name': 'Fusion Accounting — Invoice OCR',
|
||||||
|
'version': '19.0.1.0.0',
|
||||||
|
'category': 'Accounting/Accounting',
|
||||||
|
'summary': 'OCR for vendor bills via tesseract + LLM-driven field extraction.',
|
||||||
|
'description': """
|
||||||
|
Fusion Accounting — Invoice OCR
|
||||||
|
================================
|
||||||
|
Replaces Enterprise's account_invoice_extract with a Fusion-native pipeline:
|
||||||
|
|
||||||
|
1. Tesseract OCRs the bill attachment (PDF or image) into raw text
|
||||||
|
2. The fusion_accounting_ai LLMProvider parses the raw text into structured
|
||||||
|
fields (vendor, invoice number, dates, amounts, line items)
|
||||||
|
3. Draft invoice fields are populated for the AP user to confirm
|
||||||
|
|
||||||
|
Pluggable backend architecture: future Mindee, Google Document AI, or
|
||||||
|
Ollama-vision adapters can be dropped in alongside the default tesseract
|
||||||
|
adapter.
|
||||||
|
""",
|
||||||
|
'icon': '/fusion_accounting_ocr/static/description/icon.png',
|
||||||
|
'author': 'Westin / Fusion Suite',
|
||||||
|
'depends': [
|
||||||
|
'fusion_accounting_core',
|
||||||
|
'fusion_accounting_ai',
|
||||||
|
'account',
|
||||||
|
],
|
||||||
|
'external_dependencies': {
|
||||||
|
'python': ['pytesseract', 'pdf2image', 'PIL'],
|
||||||
|
},
|
||||||
|
'data': [
|
||||||
|
'security/ir.model.access.csv',
|
||||||
|
'views/account_move_views.xml',
|
||||||
|
'views/res_config_settings_views.xml',
|
||||||
|
],
|
||||||
|
'auto_install': False,
|
||||||
|
'installable': True,
|
||||||
|
'application': False,
|
||||||
|
'license': 'LGPL-3',
|
||||||
|
}
|
||||||
1
fusion_accounting_ocr/controllers/__init__.py
Normal file
1
fusion_accounting_ocr/controllers/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
from . import ocr_controller
|
||||||
21
fusion_accounting_ocr/controllers/ocr_controller.py
Normal file
21
fusion_accounting_ocr/controllers/ocr_controller.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from odoo import http
|
||||||
|
from odoo.http import request
|
||||||
|
|
||||||
|
|
||||||
|
class FusionOcrController(http.Controller):
|
||||||
|
|
||||||
|
@http.route('/fusion/ocr/request_for_invoice', type='jsonrpc', auth='user')
|
||||||
|
def request_for_invoice(self, move_id):
|
||||||
|
move = request.env['account.move'].browse(int(move_id))
|
||||||
|
move.check_access('write')
|
||||||
|
try:
|
||||||
|
move.action_request_ocr()
|
||||||
|
return {
|
||||||
|
'status': 'ok',
|
||||||
|
'state': move.ocr_state,
|
||||||
|
'backend': move.ocr_backend,
|
||||||
|
'confidence': move.ocr_confidence,
|
||||||
|
'extracted': move.ocr_extracted_data,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {'status': 'error', 'message': str(e)}
|
||||||
4
fusion_accounting_ocr/models/__init__.py
Normal file
4
fusion_accounting_ocr/models/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from . import fusion_ocr_log
|
||||||
|
from . import res_company
|
||||||
|
from . import res_config_settings
|
||||||
|
from . import account_move
|
||||||
180
fusion_accounting_ocr/models/account_move.py
Normal file
180
fusion_accounting_ocr/models/account_move.py
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
"""account.move OCR extension.
|
||||||
|
|
||||||
|
Adds an OCR pipeline triggered manually (or, optionally, automatically when
|
||||||
|
a PDF/image is attached). Stage 1 is tesseract text extraction; stage 2 is
|
||||||
|
LLM field parsing through the existing fusion_accounting_ai adapter stack.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from odoo import _, fields, models
|
||||||
|
from odoo.exceptions import UserError
|
||||||
|
|
||||||
|
from ..services.ocr_providers.tesseract_adapter import TesseractAdapter
|
||||||
|
from ..services.ocr_providers.manual_adapter import ManualAdapter
|
||||||
|
from ..services.invoice_field_parser import parse_invoice_fields
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
SUPPORTED_MIMETYPES = (
|
||||||
|
'application/pdf', 'image/png', 'image/jpeg', 'image/jpg',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AccountMove(models.Model):
|
||||||
|
_inherit = 'account.move'
|
||||||
|
|
||||||
|
ocr_state = fields.Selection(
|
||||||
|
[
|
||||||
|
('not_requested', 'Not Requested'),
|
||||||
|
('pending', 'Pending'),
|
||||||
|
('processing', 'Processing'),
|
||||||
|
('done', 'Done'),
|
||||||
|
('failed', 'Failed'),
|
||||||
|
('manual', 'Manual Entry'),
|
||||||
|
],
|
||||||
|
default='not_requested',
|
||||||
|
copy=False,
|
||||||
|
tracking=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
ocr_raw_text = fields.Text(
|
||||||
|
string='OCR Raw Text', readonly=True, copy=False,
|
||||||
|
help="Raw text extracted by the OCR backend.",
|
||||||
|
)
|
||||||
|
ocr_extracted_data = fields.Json(
|
||||||
|
string='OCR Extracted Fields', readonly=True, copy=False,
|
||||||
|
help="Structured invoice fields parsed from the OCR text by the LLM.",
|
||||||
|
)
|
||||||
|
ocr_backend = fields.Char(string='OCR Backend Used', readonly=True, copy=False)
|
||||||
|
ocr_confidence = fields.Float(string='OCR Confidence', readonly=True, copy=False)
|
||||||
|
ocr_log_ids = fields.One2many('fusion.ocr.log', 'move_id', string='OCR Runs')
|
||||||
|
|
||||||
|
def action_request_ocr(self):
|
||||||
|
"""Run OCR on the most recent supported attachment of each move."""
|
||||||
|
for move in self:
|
||||||
|
if move.move_type not in ('in_invoice', 'in_refund'):
|
||||||
|
raise UserError(_("OCR currently supports vendor bills only."))
|
||||||
|
attachment = self.env['ir.attachment'].sudo().search(
|
||||||
|
[
|
||||||
|
('res_model', '=', 'account.move'),
|
||||||
|
('res_id', '=', move.id),
|
||||||
|
('mimetype', 'in', SUPPORTED_MIMETYPES),
|
||||||
|
],
|
||||||
|
order='create_date desc',
|
||||||
|
limit=1,
|
||||||
|
)
|
||||||
|
if not attachment:
|
||||||
|
raise UserError(
|
||||||
|
_("No PDF or image attachment found on %s") % (move.name or move.id)
|
||||||
|
)
|
||||||
|
move._fusion_run_ocr(attachment)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _fusion_run_ocr(self, attachment):
|
||||||
|
self.ensure_one()
|
||||||
|
self.ocr_state = 'processing'
|
||||||
|
|
||||||
|
backend_name = (
|
||||||
|
self.company_id.fusion_ocr_default_backend
|
||||||
|
if 'fusion_ocr_default_backend' in self.company_id._fields
|
||||||
|
else 'tesseract'
|
||||||
|
)
|
||||||
|
provider = self._fusion_get_ocr_provider(backend_name)
|
||||||
|
if not provider:
|
||||||
|
self.ocr_state = 'manual'
|
||||||
|
self.message_post(
|
||||||
|
body=_("No OCR backend available; falling back to manual entry.")
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = base64.b64decode(attachment.datas)
|
||||||
|
result = provider.extract(
|
||||||
|
data, mimetype=attachment.mimetype or 'application/pdf'
|
||||||
|
)
|
||||||
|
|
||||||
|
self.write({
|
||||||
|
'ocr_raw_text': result.raw_text,
|
||||||
|
'ocr_backend': result.backend,
|
||||||
|
'ocr_confidence': result.confidence,
|
||||||
|
})
|
||||||
|
self.env['fusion.ocr.log'].sudo().create({
|
||||||
|
'move_id': self.id,
|
||||||
|
'backend': result.backend,
|
||||||
|
'confidence': result.confidence,
|
||||||
|
'raw_text_length': len(result.raw_text or ''),
|
||||||
|
'pages': result.pages,
|
||||||
|
'error': result.error,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not result.raw_text and result.error:
|
||||||
|
self.ocr_state = 'failed'
|
||||||
|
self.message_post(body=_("OCR failed: %s") % result.error)
|
||||||
|
return False
|
||||||
|
|
||||||
|
parsed = parse_invoice_fields(self.env, result.raw_text)
|
||||||
|
self.ocr_extracted_data = parsed
|
||||||
|
self.ocr_state = 'done'
|
||||||
|
|
||||||
|
self._fusion_apply_ocr_fields(parsed)
|
||||||
|
self.message_post(
|
||||||
|
body=_("OCR complete: %s confidence %.0f%%") % (
|
||||||
|
result.backend, (result.confidence or 0) * 100,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
_logger.exception("OCR run failed for move %s", self.id)
|
||||||
|
self.ocr_state = 'failed'
|
||||||
|
self.message_post(body=_("OCR error: %s") % e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _fusion_get_ocr_provider(self, backend_name):
|
||||||
|
if backend_name == 'tesseract' and TesseractAdapter.is_available():
|
||||||
|
return TesseractAdapter()
|
||||||
|
if backend_name == 'manual':
|
||||||
|
return ManualAdapter()
|
||||||
|
# Future adapters (mindee, google_doc_ai, ollama_vision) plug in
|
||||||
|
# here. Fall back to whichever adapter is actually usable.
|
||||||
|
if TesseractAdapter.is_available():
|
||||||
|
return TesseractAdapter()
|
||||||
|
return ManualAdapter()
|
||||||
|
|
||||||
|
def _fusion_apply_ocr_fields(self, parsed):
|
||||||
|
"""Apply parsed fields to a draft invoice without overwriting any
|
||||||
|
user-entered data. No-op on posted/cancelled invoices."""
|
||||||
|
if self.state != 'draft':
|
||||||
|
return
|
||||||
|
|
||||||
|
vals = {}
|
||||||
|
if parsed.get('invoice_date') and not self.invoice_date:
|
||||||
|
try:
|
||||||
|
vals['invoice_date'] = parsed['invoice_date']
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if parsed.get('due_date') and not self.invoice_date_due:
|
||||||
|
try:
|
||||||
|
vals['invoice_date_due'] = parsed['due_date']
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if parsed.get('invoice_number') and not self.ref:
|
||||||
|
vals['ref'] = parsed['invoice_number']
|
||||||
|
|
||||||
|
# Vendor: best-effort name match against existing supplier partners.
|
||||||
|
# Never auto-create a partner; AP user confirms ambiguous matches.
|
||||||
|
if parsed.get('vendor_name') and not self.partner_id:
|
||||||
|
partner = self.env['res.partner'].sudo().search(
|
||||||
|
[
|
||||||
|
('name', '=ilike', parsed['vendor_name']),
|
||||||
|
('supplier_rank', '>', 0),
|
||||||
|
],
|
||||||
|
limit=1,
|
||||||
|
)
|
||||||
|
if partner:
|
||||||
|
vals['partner_id'] = partner.id
|
||||||
|
|
||||||
|
if vals:
|
||||||
|
self.write(vals)
|
||||||
17
fusion_accounting_ocr/models/fusion_ocr_log.py
Normal file
17
fusion_accounting_ocr/models/fusion_ocr_log.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
from odoo import fields, models
|
||||||
|
|
||||||
|
|
||||||
|
class FusionOcrLog(models.Model):
|
||||||
|
_name = 'fusion.ocr.log'
|
||||||
|
_description = 'Fusion OCR Run Log'
|
||||||
|
_order = 'create_date desc'
|
||||||
|
|
||||||
|
move_id = fields.Many2one(
|
||||||
|
'account.move', required=True, ondelete='cascade', index=True,
|
||||||
|
)
|
||||||
|
backend = fields.Char(required=True)
|
||||||
|
confidence = fields.Float()
|
||||||
|
raw_text_length = fields.Integer()
|
||||||
|
pages = fields.Integer()
|
||||||
|
error = fields.Text()
|
||||||
|
create_date = fields.Datetime(readonly=True)
|
||||||
26
fusion_accounting_ocr/models/res_company.py
Normal file
26
fusion_accounting_ocr/models/res_company.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from odoo import fields, models
|
||||||
|
|
||||||
|
|
||||||
|
class ResCompany(models.Model):
|
||||||
|
_inherit = 'res.company'
|
||||||
|
|
||||||
|
fusion_ocr_enabled = fields.Boolean(
|
||||||
|
string='Enable Invoice OCR',
|
||||||
|
default=False,
|
||||||
|
help="When enabled, vendor bill attachments can be OCR'd via the "
|
||||||
|
"configured backend.",
|
||||||
|
)
|
||||||
|
fusion_ocr_default_backend = fields.Selection(
|
||||||
|
[
|
||||||
|
('tesseract', 'Tesseract (local, free)'),
|
||||||
|
('manual', 'Manual entry only'),
|
||||||
|
],
|
||||||
|
default='tesseract',
|
||||||
|
string='Default OCR Backend',
|
||||||
|
)
|
||||||
|
fusion_ocr_auto_run = fields.Boolean(
|
||||||
|
string='Auto-run OCR on attachment',
|
||||||
|
default=False,
|
||||||
|
help="When enabled, OCR runs automatically when a PDF/image is "
|
||||||
|
"attached to a vendor bill.",
|
||||||
|
)
|
||||||
15
fusion_accounting_ocr/models/res_config_settings.py
Normal file
15
fusion_accounting_ocr/models/res_config_settings.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from odoo import fields, models
|
||||||
|
|
||||||
|
|
||||||
|
class ResConfigSettings(models.TransientModel):
|
||||||
|
_inherit = 'res.config.settings'
|
||||||
|
|
||||||
|
fusion_ocr_enabled = fields.Boolean(
|
||||||
|
related='company_id.fusion_ocr_enabled', readonly=False,
|
||||||
|
)
|
||||||
|
fusion_ocr_default_backend = fields.Selection(
|
||||||
|
related='company_id.fusion_ocr_default_backend', readonly=False,
|
||||||
|
)
|
||||||
|
fusion_ocr_auto_run = fields.Boolean(
|
||||||
|
related='company_id.fusion_ocr_auto_run', readonly=False,
|
||||||
|
)
|
||||||
3
fusion_accounting_ocr/security/ir.model.access.csv
Normal file
3
fusion_accounting_ocr/security/ir.model.access.csv
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
id,name,model_id:id,group_id:id,perm_read,perm_write,perm_create,perm_unlink
|
||||||
|
access_fusion_ocr_log_user,fusion.ocr.log.user,model_fusion_ocr_log,base.group_user,1,0,0,0
|
||||||
|
access_fusion_ocr_log_manager,fusion.ocr.log.manager,model_fusion_ocr_log,account.group_account_manager,1,1,1,1
|
||||||
|
3
fusion_accounting_ocr/services/__init__.py
Normal file
3
fusion_accounting_ocr/services/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from . import ocr_providers
|
||||||
|
from . import attachment_to_image
|
||||||
|
from . import invoice_field_parser
|
||||||
43
fusion_accounting_ocr/services/attachment_to_image.py
Normal file
43
fusion_accounting_ocr/services/attachment_to_image.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
"""Helper: turn an ir.attachment into a list of PIL.Image pages.
|
||||||
|
|
||||||
|
Kept separate from the adapters so future backends (Ollama-vision, Mindee)
|
||||||
|
that want PIL images directly don't have to re-implement the PDF rendering.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def attachment_to_pages(attachment):
|
||||||
|
"""Decode an ir.attachment into a list of PIL.Image pages.
|
||||||
|
|
||||||
|
Returns ``[]`` on failure (caller should treat as no pages).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
from pdf2image import convert_from_bytes
|
||||||
|
except ImportError as e:
|
||||||
|
_logger.warning("attachment_to_pages requires PIL + pdf2image: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not attachment or not attachment.datas:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = base64.b64decode(attachment.datas)
|
||||||
|
except Exception as e:
|
||||||
|
_logger.warning("Could not decode attachment %s: %s", attachment.id, e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
mimetype = attachment.mimetype or ''
|
||||||
|
is_pdf = mimetype == 'application/pdf' or data[:4] == b'%PDF'
|
||||||
|
try:
|
||||||
|
if is_pdf:
|
||||||
|
return convert_from_bytes(data, dpi=200)
|
||||||
|
return [Image.open(io.BytesIO(data))]
|
||||||
|
except Exception as e:
|
||||||
|
_logger.warning("Could not render attachment %s: %s", attachment.id, e)
|
||||||
|
return []
|
||||||
150
fusion_accounting_ocr/services/invoice_field_parser.py
Normal file
150
fusion_accounting_ocr/services/invoice_field_parser.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
"""Stage-2 of the OCR pipeline: parse raw OCR text into structured invoice
|
||||||
|
fields via the configured LLM provider.
|
||||||
|
|
||||||
|
Mirrors the pattern in fusion_accounting_followup/services/followup_text_generator.py:
|
||||||
|
look up an adapter by ir.config_parameter, fall back gracefully when no
|
||||||
|
provider is configured, and never let an LLM hiccup nuke the OCR result.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
SYSTEM_PROMPT = (
|
||||||
|
"You are an invoice field extraction assistant. You read raw OCR text "
|
||||||
|
"from vendor bills and return a strict JSON object with the requested "
|
||||||
|
"fields. You never include commentary or markdown fences. When a field "
|
||||||
|
"cannot be determined from the text you return null for that field."
|
||||||
|
)
|
||||||
|
|
||||||
|
USER_PROMPT = """Given the raw OCR text of a vendor bill, return a JSON object
|
||||||
|
with these fields (use null when unclear):
|
||||||
|
|
||||||
|
{{
|
||||||
|
"vendor_name": <string, the seller/vendor company name>,
|
||||||
|
"invoice_number": <string, the bill or invoice reference number>,
|
||||||
|
"invoice_date": <string, ISO format YYYY-MM-DD>,
|
||||||
|
"due_date": <string or null, ISO format YYYY-MM-DD>,
|
||||||
|
"currency": <string, ISO 4217 code like CAD/USD/EUR>,
|
||||||
|
"subtotal": <number or null>,
|
||||||
|
"tax_total": <number or null>,
|
||||||
|
"total": <number, the grand total amount due>,
|
||||||
|
"line_items": [
|
||||||
|
{{"description": <string>, "quantity": <number or null>,
|
||||||
|
"unit_price": <number or null>, "amount": <number or null>}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
Return ONLY valid JSON, no commentary, no markdown fences.
|
||||||
|
|
||||||
|
Raw OCR text:
|
||||||
|
---
|
||||||
|
{text}
|
||||||
|
---
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_invoice_fields(env, raw_text: str, *, provider=None) -> dict:
|
||||||
|
"""Use the configured LLM provider to extract structured invoice fields.
|
||||||
|
|
||||||
|
Returns a dict with the schema above. On any failure (no provider, bad
|
||||||
|
JSON, network error, etc.) returns an all-null result so the OCR raw
|
||||||
|
text is still preserved for the AP user.
|
||||||
|
"""
|
||||||
|
if not raw_text or not raw_text.strip():
|
||||||
|
return _empty_result()
|
||||||
|
|
||||||
|
if provider is None:
|
||||||
|
provider = _get_provider(env)
|
||||||
|
if provider is None:
|
||||||
|
_logger.info(
|
||||||
|
"No LLM provider configured for OCR field parsing; "
|
||||||
|
"raw OCR text preserved, fields left empty."
|
||||||
|
)
|
||||||
|
return _empty_result()
|
||||||
|
|
||||||
|
try:
|
||||||
|
truncated = raw_text[:12000]
|
||||||
|
user = USER_PROMPT.format(text=truncated)
|
||||||
|
response = provider.complete(
|
||||||
|
system=SYSTEM_PROMPT,
|
||||||
|
messages=[{'role': 'user', 'content': user}],
|
||||||
|
max_tokens=1000,
|
||||||
|
temperature=0.1,
|
||||||
|
)
|
||||||
|
content = response.get('content') if isinstance(response, dict) else response
|
||||||
|
if not content:
|
||||||
|
return _empty_result()
|
||||||
|
|
||||||
|
# LLMs sometimes wrap JSON in ```json ... ``` despite instructions.
|
||||||
|
content = content.strip()
|
||||||
|
if content.startswith('```'):
|
||||||
|
content = content.split('```', 2)[1]
|
||||||
|
if content.startswith('json'):
|
||||||
|
content = content[4:]
|
||||||
|
content = content.rsplit('```', 1)[0]
|
||||||
|
|
||||||
|
parsed = json.loads(content.strip())
|
||||||
|
return {
|
||||||
|
'vendor_name': parsed.get('vendor_name'),
|
||||||
|
'invoice_number': parsed.get('invoice_number'),
|
||||||
|
'invoice_date': parsed.get('invoice_date'),
|
||||||
|
'due_date': parsed.get('due_date'),
|
||||||
|
'currency': parsed.get('currency'),
|
||||||
|
'subtotal': parsed.get('subtotal'),
|
||||||
|
'tax_total': parsed.get('tax_total'),
|
||||||
|
'total': parsed.get('total'),
|
||||||
|
'line_items': parsed.get('line_items') or [],
|
||||||
|
}
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
_logger.warning("LLM returned non-JSON for OCR field parsing: %s", e)
|
||||||
|
return _empty_result()
|
||||||
|
except Exception as e:
|
||||||
|
_logger.warning("OCR field parsing failed: %s", e)
|
||||||
|
return _empty_result()
|
||||||
|
|
||||||
|
|
||||||
|
def _empty_result():
|
||||||
|
return {
|
||||||
|
'vendor_name': None,
|
||||||
|
'invoice_number': None,
|
||||||
|
'invoice_date': None,
|
||||||
|
'due_date': None,
|
||||||
|
'currency': None,
|
||||||
|
'subtotal': None,
|
||||||
|
'tax_total': None,
|
||||||
|
'total': None,
|
||||||
|
'line_items': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_provider(env):
|
||||||
|
"""Look up the LLM adapter via ir.config_parameter.
|
||||||
|
|
||||||
|
Honours a feature-specific override
|
||||||
|
(``fusion_accounting.provider.ocr_field_parsing``) before falling back
|
||||||
|
to the suite-wide default (``fusion_accounting.provider.default``).
|
||||||
|
Returns None when no adapter is configured/importable.
|
||||||
|
"""
|
||||||
|
param = env['ir.config_parameter'].sudo()
|
||||||
|
name = param.get_param('fusion_accounting.provider.ocr_field_parsing')
|
||||||
|
if not name:
|
||||||
|
name = param.get_param('fusion_accounting.provider.default')
|
||||||
|
if not name:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from odoo.addons.fusion_accounting_ai.services.adapters.openai_adapter import OpenAIAdapter
|
||||||
|
from odoo.addons.fusion_accounting_ai.services.adapters.claude import ClaudeAdapter
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
if name.startswith('openai'):
|
||||||
|
return OpenAIAdapter(env)
|
||||||
|
if name.startswith('claude'):
|
||||||
|
return ClaudeAdapter(env)
|
||||||
|
except Exception as e:
|
||||||
|
_logger.warning("OCR field parser could not instantiate %s: %s", name, e)
|
||||||
|
return None
|
||||||
|
return None
|
||||||
3
fusion_accounting_ocr/services/ocr_providers/__init__.py
Normal file
3
fusion_accounting_ocr/services/ocr_providers/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from . import base
|
||||||
|
from . import tesseract_adapter
|
||||||
|
from . import manual_adapter
|
||||||
40
fusion_accounting_ocr/services/ocr_providers/base.py
Normal file
40
fusion_accounting_ocr/services/ocr_providers/base.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
"""OCRProvider contract - every backend must conform.
|
||||||
|
|
||||||
|
Mirrors the LLMProvider pattern in fusion_accounting_ai. Future adapters
|
||||||
|
(Mindee, Google Document AI, Ollama-vision) drop in alongside the default
|
||||||
|
tesseract adapter without touching account.move.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OCRResult:
|
||||||
|
raw_text: str = ''
|
||||||
|
confidence: float = 0.0 # 0.0–1.0
|
||||||
|
pages: int = 0
|
||||||
|
backend: str = ''
|
||||||
|
error: str = ''
|
||||||
|
metadata: dict = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class OCRProvider(ABC):
|
||||||
|
"""Abstract OCR backend. Subclasses implement extract()."""
|
||||||
|
|
||||||
|
name: str = 'base'
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def extract(self, image_or_pdf_bytes: bytes, *, mimetype: str = 'application/pdf') -> OCRResult:
|
||||||
|
"""Extract text from raw bytes.
|
||||||
|
|
||||||
|
``mimetype`` hints whether to PDF-render (poppler) or image-decode
|
||||||
|
(PIL) the bytes. Implementations should still inspect the byte
|
||||||
|
signature for safety.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_available(cls) -> bool:
|
||||||
|
"""Return True if the backend's runtime deps are present."""
|
||||||
|
return True
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
"""Manual fallback adapter - no real OCR, just marks the document as
|
||||||
|
'awaiting manual entry'. Used when no real OCR backend is available
|
||||||
|
or when the user explicitly disables OCR.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base import OCRProvider, OCRResult
|
||||||
|
|
||||||
|
|
||||||
|
class ManualAdapter(OCRProvider):
|
||||||
|
name = 'manual'
|
||||||
|
|
||||||
|
def extract(self, image_or_pdf_bytes, *, mimetype='application/pdf'):
|
||||||
|
return OCRResult(raw_text='', confidence=0.0, pages=0, backend='manual')
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
"""Tesseract OCR adapter.
|
||||||
|
|
||||||
|
Uses the system tesseract binary via pytesseract, with poppler-backed
|
||||||
|
PDF rendering via pdf2image. Inside the container these are pre-installed:
|
||||||
|
- tesseract-ocr 5.3.4
|
||||||
|
- pytesseract 0.3.13
|
||||||
|
- pdf2image 1.17.0
|
||||||
|
- poppler-utils
|
||||||
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from .base import OCRProvider, OCRResult
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TesseractAdapter(OCRProvider):
|
||||||
|
name = 'tesseract'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_available(cls) -> bool:
|
||||||
|
try:
|
||||||
|
import pytesseract
|
||||||
|
from pdf2image import convert_from_bytes # noqa: F401
|
||||||
|
from PIL import Image # noqa: F401
|
||||||
|
pytesseract.get_tesseract_version()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
_logger.debug("TesseractAdapter not available: %s", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def extract(self, image_or_pdf_bytes, *, mimetype='application/pdf'):
|
||||||
|
import pytesseract
|
||||||
|
from pdf2image import convert_from_bytes
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_pdf = (
|
||||||
|
mimetype == 'application/pdf'
|
||||||
|
or (image_or_pdf_bytes[:4] == b'%PDF')
|
||||||
|
)
|
||||||
|
if is_pdf:
|
||||||
|
pages = convert_from_bytes(image_or_pdf_bytes, dpi=200)
|
||||||
|
else:
|
||||||
|
img = Image.open(io.BytesIO(image_or_pdf_bytes))
|
||||||
|
pages = [img]
|
||||||
|
|
||||||
|
texts = []
|
||||||
|
for p in pages:
|
||||||
|
texts.append(pytesseract.image_to_string(p))
|
||||||
|
full_text = '\n\f\n'.join(texts)
|
||||||
|
|
||||||
|
# Heuristic confidence - tesseract has a per-word conf in
|
||||||
|
# image_to_data, but a length proxy is fine for routing
|
||||||
|
# decisions. Future: use pytesseract.image_to_data for a real
|
||||||
|
# average word-level confidence.
|
||||||
|
conf = min(1.0, len(full_text) / 1000.0)
|
||||||
|
return OCRResult(
|
||||||
|
raw_text=full_text,
|
||||||
|
confidence=conf,
|
||||||
|
pages=len(pages),
|
||||||
|
backend='tesseract',
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
_logger.warning("Tesseract OCR failed: %s", e)
|
||||||
|
return OCRResult(
|
||||||
|
raw_text='', confidence=0.0, pages=0,
|
||||||
|
backend='tesseract', error=str(e),
|
||||||
|
)
|
||||||
BIN
fusion_accounting_ocr/static/description/icon.png
Normal file
BIN
fusion_accounting_ocr/static/description/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 72 KiB |
3
fusion_accounting_ocr/tests/__init__.py
Normal file
3
fusion_accounting_ocr/tests/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from . import test_tesseract_adapter
|
||||||
|
from . import test_invoice_ocr_flow
|
||||||
|
from . import test_field_parser
|
||||||
74
fusion_accounting_ocr/tests/test_field_parser.py
Normal file
74
fusion_accounting_ocr/tests/test_field_parser.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
from odoo.tests import tagged
|
||||||
|
from odoo.tests.common import TransactionCase
|
||||||
|
|
||||||
|
from odoo.addons.fusion_accounting_ocr.services.invoice_field_parser import (
|
||||||
|
parse_invoice_fields,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@tagged('post_install', '-at_install')
|
||||||
|
class TestFieldParser(TransactionCase):
|
||||||
|
|
||||||
|
def test_parser_handles_empty_text(self):
|
||||||
|
result = parse_invoice_fields(self.env, '')
|
||||||
|
self.assertIsNone(result['total'])
|
||||||
|
self.assertEqual(result['line_items'], [])
|
||||||
|
|
||||||
|
def test_parser_handles_no_provider_gracefully(self):
|
||||||
|
# Without an LLM provider configured, parse should return an empty
|
||||||
|
# result dict rather than crashing.
|
||||||
|
result = parse_invoice_fields(self.env, 'INVOICE 12345 Total $100')
|
||||||
|
self.assertIn('total', result)
|
||||||
|
self.assertIn('line_items', result)
|
||||||
|
self.assertIsInstance(result['line_items'], list)
|
||||||
|
|
||||||
|
def test_parser_consumes_clean_json(self):
|
||||||
|
provider = MagicMock()
|
||||||
|
provider.complete.return_value = {
|
||||||
|
'content': (
|
||||||
|
'{"vendor_name": "Acme Co", "invoice_number": "INV-1",'
|
||||||
|
' "invoice_date": "2026-04-20", "due_date": null,'
|
||||||
|
' "currency": "CAD", "subtotal": 90.0, "tax_total": 10.0,'
|
||||||
|
' "total": 100.0, "line_items": ['
|
||||||
|
'{"description": "Widget", "quantity": 1, "unit_price": 90.0,'
|
||||||
|
' "amount": 90.0}]}'
|
||||||
|
),
|
||||||
|
}
|
||||||
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||||
|
self.assertEqual(result['vendor_name'], 'Acme Co')
|
||||||
|
self.assertEqual(result['invoice_number'], 'INV-1')
|
||||||
|
self.assertEqual(result['total'], 100.0)
|
||||||
|
self.assertEqual(len(result['line_items']), 1)
|
||||||
|
self.assertEqual(result['line_items'][0]['description'], 'Widget')
|
||||||
|
|
||||||
|
def test_parser_strips_markdown_fences(self):
|
||||||
|
provider = MagicMock()
|
||||||
|
provider.complete.return_value = {
|
||||||
|
'content': (
|
||||||
|
'```json\n'
|
||||||
|
'{"vendor_name": "Beta Ltd", "invoice_number": "B-2",'
|
||||||
|
' "invoice_date": null, "due_date": null, "currency": null,'
|
||||||
|
' "subtotal": null, "tax_total": null, "total": 5.5,'
|
||||||
|
' "line_items": []}\n'
|
||||||
|
'```'
|
||||||
|
),
|
||||||
|
}
|
||||||
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||||
|
self.assertEqual(result['vendor_name'], 'Beta Ltd')
|
||||||
|
self.assertEqual(result['total'], 5.5)
|
||||||
|
|
||||||
|
def test_parser_returns_empty_on_invalid_json(self):
|
||||||
|
provider = MagicMock()
|
||||||
|
provider.complete.return_value = {'content': 'not json at all'}
|
||||||
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||||
|
self.assertIsNone(result['total'])
|
||||||
|
self.assertEqual(result['line_items'], [])
|
||||||
|
|
||||||
|
def test_parser_returns_empty_on_provider_exception(self):
|
||||||
|
provider = MagicMock()
|
||||||
|
provider.complete.side_effect = RuntimeError('boom')
|
||||||
|
result = parse_invoice_fields(self.env, 'raw text', provider=provider)
|
||||||
|
self.assertIsNone(result['total'])
|
||||||
|
self.assertEqual(result['line_items'], [])
|
||||||
117
fusion_accounting_ocr/tests/test_invoice_ocr_flow.py
Normal file
117
fusion_accounting_ocr/tests/test_invoice_ocr_flow.py
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
import base64
|
||||||
|
import io
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
|
||||||
|
from odoo.exceptions import UserError
|
||||||
|
from odoo.tests import tagged
|
||||||
|
from odoo.tests.common import TransactionCase
|
||||||
|
|
||||||
|
|
||||||
|
@tagged('post_install', '-at_install')
|
||||||
|
class TestInvoiceOcrFlow(TransactionCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super().setUp()
|
||||||
|
self.partner = self.env['res.partner'].create({
|
||||||
|
'name': 'Test Vendor',
|
||||||
|
'supplier_rank': 1,
|
||||||
|
})
|
||||||
|
self.move = self.env['account.move'].create({
|
||||||
|
'move_type': 'in_invoice',
|
||||||
|
'partner_id': self.partner.id,
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_ocr_state_default(self):
|
||||||
|
self.assertEqual(self.move.ocr_state, 'not_requested')
|
||||||
|
|
||||||
|
def test_action_request_ocr_no_attachment_raises(self):
|
||||||
|
with self.assertRaises(UserError):
|
||||||
|
self.move.action_request_ocr()
|
||||||
|
|
||||||
|
def test_action_request_ocr_with_image(self):
|
||||||
|
img = Image.new('RGB', (800, 120), color='white')
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
try:
|
||||||
|
from PIL import ImageFont
|
||||||
|
font = ImageFont.truetype(
|
||||||
|
'/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 36,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
font = None
|
||||||
|
draw.text((20, 30), "TOTAL $50.00 INV-9999", fill='black', font=font)
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format='PNG')
|
||||||
|
|
||||||
|
self.env['ir.attachment'].create({
|
||||||
|
'name': 'test_invoice.png',
|
||||||
|
'datas': base64.b64encode(buf.getvalue()),
|
||||||
|
'res_model': 'account.move',
|
||||||
|
'res_id': self.move.id,
|
||||||
|
'mimetype': 'image/png',
|
||||||
|
})
|
||||||
|
|
||||||
|
# Mock the LLM call to avoid a real API roundtrip.
|
||||||
|
with patch(
|
||||||
|
'odoo.addons.fusion_accounting_ocr.models.account_move.parse_invoice_fields',
|
||||||
|
return_value={
|
||||||
|
'vendor_name': None,
|
||||||
|
'invoice_number': 'INV-9999',
|
||||||
|
'invoice_date': None,
|
||||||
|
'due_date': None,
|
||||||
|
'currency': None,
|
||||||
|
'subtotal': None,
|
||||||
|
'tax_total': None,
|
||||||
|
'total': 50.0,
|
||||||
|
'line_items': [],
|
||||||
|
},
|
||||||
|
):
|
||||||
|
self.move.action_request_ocr()
|
||||||
|
|
||||||
|
self.assertEqual(self.move.ocr_state, 'done')
|
||||||
|
self.assertEqual(self.move.ocr_backend, 'tesseract')
|
||||||
|
self.assertGreater(self.move.ocr_confidence, 0)
|
||||||
|
self.assertIsNotNone(self.move.ocr_extracted_data)
|
||||||
|
# Parsed invoice_number should land on the invoice's ref field.
|
||||||
|
self.assertEqual(self.move.ref, 'INV-9999')
|
||||||
|
# OCR log row was created.
|
||||||
|
self.assertEqual(len(self.move.ocr_log_ids), 1)
|
||||||
|
log = self.move.ocr_log_ids
|
||||||
|
self.assertEqual(log.backend, 'tesseract')
|
||||||
|
self.assertGreater(log.raw_text_length, 0)
|
||||||
|
|
||||||
|
def test_apply_does_not_overwrite_user_entered_ref(self):
|
||||||
|
self.move.ref = 'USER-SET-REF'
|
||||||
|
img = Image.new('RGB', (400, 80), color='white')
|
||||||
|
ImageDraw.Draw(img).text((10, 30), "INV-7777", fill='black')
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format='PNG')
|
||||||
|
self.env['ir.attachment'].create({
|
||||||
|
'name': 't.png',
|
||||||
|
'datas': base64.b64encode(buf.getvalue()),
|
||||||
|
'res_model': 'account.move',
|
||||||
|
'res_id': self.move.id,
|
||||||
|
'mimetype': 'image/png',
|
||||||
|
})
|
||||||
|
with patch(
|
||||||
|
'odoo.addons.fusion_accounting_ocr.models.account_move.parse_invoice_fields',
|
||||||
|
return_value={
|
||||||
|
'vendor_name': None, 'invoice_number': 'INV-7777',
|
||||||
|
'invoice_date': None, 'due_date': None, 'currency': None,
|
||||||
|
'subtotal': None, 'tax_total': None, 'total': None,
|
||||||
|
'line_items': [],
|
||||||
|
},
|
||||||
|
):
|
||||||
|
self.move.action_request_ocr()
|
||||||
|
|
||||||
|
# User-entered ref must not be overwritten.
|
||||||
|
self.assertEqual(self.move.ref, 'USER-SET-REF')
|
||||||
|
|
||||||
|
def test_only_vendor_bills_supported(self):
|
||||||
|
customer_invoice = self.env['account.move'].create({
|
||||||
|
'move_type': 'out_invoice',
|
||||||
|
'partner_id': self.partner.id,
|
||||||
|
})
|
||||||
|
with self.assertRaises(UserError):
|
||||||
|
customer_invoice.action_request_ocr()
|
||||||
47
fusion_accounting_ocr/tests/test_tesseract_adapter.py
Normal file
47
fusion_accounting_ocr/tests/test_tesseract_adapter.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import io
|
||||||
|
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
|
||||||
|
from odoo.tests import tagged
|
||||||
|
from odoo.tests.common import TransactionCase
|
||||||
|
|
||||||
|
from odoo.addons.fusion_accounting_ocr.services.ocr_providers.tesseract_adapter import (
|
||||||
|
TesseractAdapter,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@tagged('post_install', '-at_install')
|
||||||
|
class TestTesseractAdapter(TransactionCase):
|
||||||
|
|
||||||
|
def test_is_available(self):
|
||||||
|
# In our container tesseract + pytesseract + pdf2image are pre-installed.
|
||||||
|
self.assertTrue(TesseractAdapter.is_available())
|
||||||
|
|
||||||
|
def test_extract_simple_text_image(self):
|
||||||
|
# Generate a tiny PNG with the text "INVOICE 12345 Total $100".
|
||||||
|
# Use a slightly larger image and try to load a TTF font for
|
||||||
|
# tesseract reliability; fall back to default bitmap font otherwise.
|
||||||
|
img = Image.new('RGB', (800, 120), color='white')
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
try:
|
||||||
|
from PIL import ImageFont
|
||||||
|
font = ImageFont.truetype(
|
||||||
|
'/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 36,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
font = None
|
||||||
|
draw.text((20, 30), "INVOICE 12345 Total $100", fill='black', font=font)
|
||||||
|
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format='PNG')
|
||||||
|
png_bytes = buf.getvalue()
|
||||||
|
|
||||||
|
adapter = TesseractAdapter()
|
||||||
|
result = adapter.extract(png_bytes, mimetype='image/png')
|
||||||
|
|
||||||
|
self.assertEqual(result.backend, 'tesseract')
|
||||||
|
self.assertEqual(result.error, '')
|
||||||
|
self.assertEqual(result.pages, 1)
|
||||||
|
self.assertGreater(len(result.raw_text), 0)
|
||||||
|
# Tesseract should pick up the digits at minimum.
|
||||||
|
self.assertIn('12345', result.raw_text.replace(' ', ''))
|
||||||
45
fusion_accounting_ocr/views/account_move_views.xml
Normal file
45
fusion_accounting_ocr/views/account_move_views.xml
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<odoo>
|
||||||
|
|
||||||
|
<record id="view_move_form_inherit_fusion_ocr" model="ir.ui.view">
|
||||||
|
<field name="name">account.move.form.inherit.fusion_ocr</field>
|
||||||
|
<field name="model">account.move</field>
|
||||||
|
<field name="inherit_id" ref="account.view_move_form"/>
|
||||||
|
<field name="arch" type="xml">
|
||||||
|
|
||||||
|
<xpath expr="//header" position="inside">
|
||||||
|
<button name="action_request_ocr"
|
||||||
|
type="object"
|
||||||
|
string="Request OCR"
|
||||||
|
class="oe_highlight"
|
||||||
|
invisible="move_type not in ('in_invoice', 'in_refund') or ocr_state in ('processing', 'done')"/>
|
||||||
|
<button name="action_request_ocr"
|
||||||
|
type="object"
|
||||||
|
string="Re-run OCR"
|
||||||
|
invisible="move_type not in ('in_invoice', 'in_refund') or ocr_state not in ('done', 'failed', 'manual')"/>
|
||||||
|
</xpath>
|
||||||
|
|
||||||
|
<xpath expr="//sheet" position="inside">
|
||||||
|
<group string="Fusion OCR"
|
||||||
|
invisible="move_type not in ('in_invoice', 'in_refund') or ocr_state == 'not_requested'">
|
||||||
|
<group>
|
||||||
|
<field name="ocr_state" widget="badge"
|
||||||
|
decoration-success="ocr_state == 'done'"
|
||||||
|
decoration-info="ocr_state == 'processing'"
|
||||||
|
decoration-warning="ocr_state == 'manual'"
|
||||||
|
decoration-danger="ocr_state == 'failed'"/>
|
||||||
|
<field name="ocr_backend" readonly="1"/>
|
||||||
|
<field name="ocr_confidence" readonly="1" widget="percentage"/>
|
||||||
|
</group>
|
||||||
|
<group>
|
||||||
|
<field name="ocr_extracted_data" readonly="1" widget="text"/>
|
||||||
|
</group>
|
||||||
|
<field name="ocr_raw_text" readonly="1" nolabel="1"
|
||||||
|
placeholder="Raw OCR text..."/>
|
||||||
|
</group>
|
||||||
|
</xpath>
|
||||||
|
|
||||||
|
</field>
|
||||||
|
</record>
|
||||||
|
|
||||||
|
</odoo>
|
||||||
35
fusion_accounting_ocr/views/res_config_settings_views.xml
Normal file
35
fusion_accounting_ocr/views/res_config_settings_views.xml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<odoo>
|
||||||
|
|
||||||
|
<record id="res_config_settings_view_form_inherit_fusion_ocr" model="ir.ui.view">
|
||||||
|
<field name="name">res.config.settings.view.form.inherit.fusion_ocr</field>
|
||||||
|
<field name="model">res.config.settings</field>
|
||||||
|
<field name="inherit_id" ref="account.res_config_settings_view_form"/>
|
||||||
|
<field name="arch" type="xml">
|
||||||
|
|
||||||
|
<xpath expr="//block[@id='account_vendor_bills']" position="after">
|
||||||
|
<block title="Fusion Invoice OCR" id="fusion_ocr_section">
|
||||||
|
<setting id="fusion_ocr_enabled_setting"
|
||||||
|
string="Enable Invoice OCR"
|
||||||
|
help="OCR vendor bill attachments via the configured backend.">
|
||||||
|
<field name="fusion_ocr_enabled"/>
|
||||||
|
<div class="content-group" invisible="not fusion_ocr_enabled">
|
||||||
|
<div class="mt16">
|
||||||
|
<label for="fusion_ocr_default_backend"
|
||||||
|
string="Default OCR Backend" class="o_light_label"/>
|
||||||
|
<field name="fusion_ocr_default_backend"/>
|
||||||
|
</div>
|
||||||
|
<div class="mt16">
|
||||||
|
<field name="fusion_ocr_auto_run"/>
|
||||||
|
<label for="fusion_ocr_auto_run"
|
||||||
|
string="Auto-run OCR on attachment"/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</setting>
|
||||||
|
</block>
|
||||||
|
</xpath>
|
||||||
|
|
||||||
|
</field>
|
||||||
|
</record>
|
||||||
|
|
||||||
|
</odoo>
|
||||||
Reference in New Issue
Block a user