changes
This commit is contained in:
8
fusion_digitize/models/__init__.py
Normal file
8
fusion_digitize/models/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2026 Nexa Systems Inc.
|
||||
# License OPL-1 (Odoo Proprietary License v1.0)
|
||||
|
||||
from . import extract_service
|
||||
from . import account_move
|
||||
from . import account_bank_statement
|
||||
from . import res_config_settings
|
||||
41
fusion_digitize/models/account_bank_statement.py
Normal file
41
fusion_digitize/models/account_bank_statement.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2026 Nexa Systems Inc.
|
||||
# License OPL-1 (Odoo Proprietary License v1.0)
|
||||
|
||||
import logging
|
||||
|
||||
from odoo import models
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AccountBankStatement(models.Model):
|
||||
_inherit = 'account.bank.statement'
|
||||
|
||||
def _contact_iap_extract(self, pathinfo, params):
|
||||
ICP = self.env['ir.config_parameter'].sudo()
|
||||
service = self.env['fusion.digitize.service']
|
||||
|
||||
api_key = service._get_api_key()
|
||||
if not api_key:
|
||||
return super()._contact_iap_extract(pathinfo, params)
|
||||
|
||||
enabled = ICP.get_param(
|
||||
'fusion_digitize.enable_bank_statements', 'True',
|
||||
) == 'True'
|
||||
if not enabled:
|
||||
return super()._contact_iap_extract(pathinfo, params)
|
||||
|
||||
if pathinfo == 'parse':
|
||||
_logger.info(
|
||||
"Fusion Digitize: intercepting bank statement parse request",
|
||||
)
|
||||
return service._handle_parse(params, 'bank_statement')
|
||||
|
||||
if pathinfo == 'get_result':
|
||||
return service._handle_get_result(params)
|
||||
|
||||
if pathinfo == 'validate':
|
||||
return {'status': 'success'}
|
||||
|
||||
return super()._contact_iap_extract(pathinfo, params)
|
||||
44
fusion_digitize/models/account_move.py
Normal file
44
fusion_digitize/models/account_move.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2026 Nexa Systems Inc.
|
||||
# License OPL-1 (Odoo Proprietary License v1.0)
|
||||
|
||||
import logging
|
||||
|
||||
from odoo import api, models
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AccountMove(models.Model):
|
||||
_inherit = 'account.move'
|
||||
|
||||
@api.model
|
||||
def _contact_iap_extract(self, pathinfo, params):
|
||||
ICP = self.env['ir.config_parameter'].sudo()
|
||||
service = self.env['fusion.digitize.service']
|
||||
|
||||
api_key = service._get_api_key()
|
||||
if not api_key:
|
||||
return super()._contact_iap_extract(pathinfo, params)
|
||||
|
||||
vendor_bills_enabled = ICP.get_param(
|
||||
'fusion_digitize.enable_vendor_bills', 'True',
|
||||
) == 'True'
|
||||
customer_invoices_enabled = ICP.get_param(
|
||||
'fusion_digitize.enable_customer_invoices', 'True',
|
||||
) == 'True'
|
||||
|
||||
if not vendor_bills_enabled and not customer_invoices_enabled:
|
||||
return super()._contact_iap_extract(pathinfo, params)
|
||||
|
||||
if pathinfo == 'parse':
|
||||
_logger.info("Fusion Digitize: intercepting invoice parse request")
|
||||
return service._handle_parse(params, 'invoice')
|
||||
|
||||
if pathinfo == 'get_result':
|
||||
return service._handle_get_result(params)
|
||||
|
||||
if pathinfo == 'validate':
|
||||
return {'status': 'success'}
|
||||
|
||||
return super()._contact_iap_extract(pathinfo, params)
|
||||
521
fusion_digitize/models/extract_service.py
Normal file
521
fusion_digitize/models/extract_service.py
Normal file
@@ -0,0 +1,521 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2026 Nexa Systems Inc.
|
||||
# License OPL-1 (Odoo Proprietary License v1.0)
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from odoo import models
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
INVOICE_EXTRACTION_PROMPT = """You are an accounts payable data extraction expert. Extract ALL fields from the provided invoice/bill document with perfect accuracy.
|
||||
|
||||
IMPORTANT: The text may come from PDF text extraction where TABLE COLUMNS are jumbled or separated. Carefully reconstruct the table by matching:
|
||||
- Item numbers / descriptions appear first
|
||||
- Quantities and units follow each item
|
||||
- Unit prices may appear near their item OR grouped separately
|
||||
- The AMOUNT column (rightmost) shows the line total (qty * unit_price). Use it to verify or compute unit_price = amount / quantity
|
||||
- Discount columns (often all zeros) may appear between unit_price and amount
|
||||
- If an item number appears more than once, each occurrence is a SEPARATE line item
|
||||
|
||||
CRITICAL: Extract EVERY individual line item. Do NOT merge or skip any. If the same product appears twice, include both.
|
||||
|
||||
Return ONLY valid JSON with this exact structure (use null for missing values):
|
||||
{
|
||||
"supplier": "Vendor/supplier company name that issued the bill",
|
||||
"client": "Client/customer/buyer company name (the company being billed)",
|
||||
"total": 0.00,
|
||||
"subtotal": 0.00,
|
||||
"total_tax_amount": 0.00,
|
||||
"invoice_id": "Invoice number, bill number, or reference number",
|
||||
"date": "YYYY-MM-DD",
|
||||
"due_date": "YYYY-MM-DD",
|
||||
"currency": "Three-letter currency code (e.g. CAD, USD, EUR)",
|
||||
"vat_number": "VAT or tax registration number of supplier",
|
||||
"payment_ref": "Payment reference or structured communication",
|
||||
"iban": "Bank account number or IBAN",
|
||||
"country": "Two-letter country code of supplier (e.g. CA, US)",
|
||||
"invoice_lines": [
|
||||
{
|
||||
"description": "Full product/service description for this line",
|
||||
"quantity": 1.0,
|
||||
"unit_price": 0.00,
|
||||
"taxes": [13.0],
|
||||
"subtotal": 0.00,
|
||||
"total": 0.00
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Extract EVERY line item. Count them carefully. If the document lists 14 products, you must return 14 entries.
|
||||
- "subtotal" per line = quantity * unit_price (the line amount before tax)
|
||||
- "total" per line = subtotal + tax for that line. If no per-line tax, set total = subtotal.
|
||||
- The top-level "subtotal" is the document's subtotal (sum of all line amounts before tax)
|
||||
- The top-level "total" is the final amount due including all taxes and charges
|
||||
- For taxes: check the tax summary section. If items have tax code "00" or 0%, use an empty list []. Only include the tax percentage if that line is actually taxed (e.g. [13] for HST 13%).
|
||||
- If the Amount column value differs from qty * unit_price, TRUST the Amount column and compute unit_price = amount / quantity
|
||||
- If you see "Total charges", "Freight", or "Handling" as a separate charge, include it as its own line
|
||||
- Dates must be in YYYY-MM-DD format
|
||||
- Do NOT include any text outside the JSON object"""
|
||||
|
||||
BANK_STATEMENT_EXTRACTION_PROMPT = """You are a document data extraction assistant. Extract bank statement fields from the provided document content.
|
||||
|
||||
Return ONLY valid JSON with this exact structure (use null for missing values):
|
||||
{
|
||||
"balance_start": 0.00,
|
||||
"balance_end": 0.00,
|
||||
"date": "YYYY-MM-DD",
|
||||
"lines": [
|
||||
{
|
||||
"date": "YYYY-MM-DD",
|
||||
"description": "Transaction description",
|
||||
"amount": 0.00
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Positive amounts for deposits/credits, negative for withdrawals/debits
|
||||
- Dates must be in YYYY-MM-DD format
|
||||
- Do NOT include any text outside the JSON object"""
|
||||
|
||||
MIN_USEFUL_TEXT_LENGTH = 50
|
||||
|
||||
|
||||
class FusionDigitizeService(models.AbstractModel):
|
||||
_name = 'fusion.digitize.service'
|
||||
_description = 'Fusion Digitize Extraction Service'
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Configuration helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _get_api_key(self):
|
||||
ICP = self.env['ir.config_parameter'].sudo()
|
||||
key = ICP.get_param('fusion_digitize.openai_api_key', '')
|
||||
if key:
|
||||
return key
|
||||
return ICP.get_param('fusion_accounts.openai_api_key', '')
|
||||
|
||||
def _get_ai_model(self):
|
||||
return self.env['ir.config_parameter'].sudo().get_param(
|
||||
'fusion_digitize.ai_model', 'gpt-4o-mini',
|
||||
)
|
||||
|
||||
def _is_tesseract_enabled(self):
|
||||
return self.env['ir.config_parameter'].sudo().get_param(
|
||||
'fusion_digitize.enable_tesseract', 'True',
|
||||
) == 'True'
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tier 1: pdfminer text extraction (FREE, instant)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_text_pdfminer(self, pdf_bytes):
|
||||
try:
|
||||
from pdfminer.high_level import extract_text
|
||||
from io import BytesIO
|
||||
text = extract_text(BytesIO(pdf_bytes))
|
||||
if text:
|
||||
_logger.info(
|
||||
"pdfminer extracted %d chars", len(text.strip()),
|
||||
)
|
||||
return (text or '').strip()
|
||||
except ImportError:
|
||||
_logger.warning("pdfminer not available")
|
||||
return ''
|
||||
except Exception as exc:
|
||||
_logger.warning("pdfminer extraction failed: %s", exc)
|
||||
return ''
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tier 2: Tesseract OCR (FREE, ~2-5s)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_text_tesseract(self, pdf_bytes):
|
||||
if not self._is_tesseract_enabled():
|
||||
return ''
|
||||
try:
|
||||
from pdf2image import convert_from_bytes
|
||||
import pytesseract
|
||||
images = convert_from_bytes(pdf_bytes, dpi=300)
|
||||
texts = [pytesseract.image_to_string(img) for img in images[:5]]
|
||||
result = '\n'.join(texts).strip()
|
||||
if result:
|
||||
_logger.info("Tesseract extracted %d chars", len(result))
|
||||
return result
|
||||
except ImportError:
|
||||
_logger.warning("pytesseract or pdf2image not available")
|
||||
return ''
|
||||
except Exception as exc:
|
||||
_logger.warning("Tesseract extraction failed: %s", exc)
|
||||
return ''
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tier 3: OpenAI Vision (PAID, last resort)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _pdf_to_base64_images(self, pdf_bytes, max_pages=3):
|
||||
images = []
|
||||
try:
|
||||
from pdf2image import convert_from_bytes
|
||||
pil_images = convert_from_bytes(pdf_bytes, dpi=200)
|
||||
for img in pil_images[:max_pages]:
|
||||
from io import BytesIO
|
||||
buf = BytesIO()
|
||||
img.save(buf, format='PNG')
|
||||
images.append(base64.b64encode(buf.getvalue()).decode())
|
||||
except ImportError:
|
||||
_logger.warning("pdf2image not available for vision fallback")
|
||||
except Exception as exc:
|
||||
_logger.warning("PDF to image conversion failed: %s", exc)
|
||||
return images
|
||||
|
||||
def _extract_via_vision(self, pdf_bytes, doc_type):
|
||||
api_key = self._get_api_key()
|
||||
if not api_key:
|
||||
return {}
|
||||
|
||||
images = self._pdf_to_base64_images(pdf_bytes)
|
||||
if not images:
|
||||
return {}
|
||||
|
||||
prompt = (
|
||||
INVOICE_EXTRACTION_PROMPT if doc_type == 'invoice'
|
||||
else BANK_STATEMENT_EXTRACTION_PROMPT
|
||||
)
|
||||
|
||||
content_parts = [{"type": "text", "text": "Extract data from these document images:"}]
|
||||
for img_b64 in images:
|
||||
content_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{img_b64}", "detail": "high"},
|
||||
})
|
||||
|
||||
model = self._get_ai_model()
|
||||
if model == 'gpt-4o-mini':
|
||||
model = 'gpt-4o'
|
||||
|
||||
return self._call_openai(api_key, model, prompt, content_parts)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Orchestration: 3-tier text extraction
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_text(self, pdf_bytes):
|
||||
text = self._extract_text_pdfminer(pdf_bytes)
|
||||
if self._text_is_useful(text):
|
||||
return text, 'pdfminer'
|
||||
|
||||
text = self._extract_text_tesseract(pdf_bytes)
|
||||
if self._text_is_useful(text):
|
||||
return text, 'tesseract'
|
||||
|
||||
return '', 'none'
|
||||
|
||||
@staticmethod
|
||||
def _text_is_useful(text):
|
||||
if not text:
|
||||
return False
|
||||
clean = ''.join(text.split())
|
||||
return len(clean) > MIN_USEFUL_TEXT_LENGTH
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# AI field mapping (text -> structured data)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _map_fields_from_text(self, text, doc_type):
|
||||
api_key = self._get_api_key()
|
||||
if not api_key:
|
||||
_logger.warning("No OpenAI API key configured for Fusion Digitize")
|
||||
return {}
|
||||
|
||||
prompt = (
|
||||
INVOICE_EXTRACTION_PROMPT if doc_type == 'invoice'
|
||||
else BANK_STATEMENT_EXTRACTION_PROMPT
|
||||
)
|
||||
|
||||
content_parts = [{"type": "text", "text": f"DOCUMENT TEXT:\n{text[:12000]}"}]
|
||||
model = self._get_ai_model()
|
||||
return self._call_openai(api_key, model, prompt, content_parts)
|
||||
|
||||
def _call_openai(self, api_key, model, system_prompt, content_parts):
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
_logger.error("requests library not available")
|
||||
return {}
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": content_parts},
|
||||
]
|
||||
|
||||
try:
|
||||
resp = requests.post(
|
||||
'https://api.openai.com/v1/chat/completions',
|
||||
headers={
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
json={
|
||||
'model': model,
|
||||
'messages': messages,
|
||||
'max_tokens': 4000,
|
||||
'temperature': 0.1,
|
||||
},
|
||||
timeout=90,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
body = resp.json()
|
||||
content = body['choices'][0]['message']['content'].strip()
|
||||
|
||||
if content.startswith('```'):
|
||||
lines = content.split('\n')
|
||||
content = '\n'.join(
|
||||
lines[1:-1] if lines[-1].strip() == '```' else lines[1:]
|
||||
).strip()
|
||||
|
||||
parsed = json.loads(content)
|
||||
line_count = len(parsed.get('invoice_lines') or parsed.get('lines') or [])
|
||||
_logger.info(
|
||||
"OpenAI response (model=%s, lines=%d): supplier=%s, "
|
||||
"subtotal=%s, tax=%s, total=%s",
|
||||
model, line_count,
|
||||
parsed.get('supplier', ''),
|
||||
parsed.get('subtotal', parsed.get('balance_start', '')),
|
||||
parsed.get('total_tax_amount', ''),
|
||||
parsed.get('total', parsed.get('balance_end', '')),
|
||||
)
|
||||
for i, line in enumerate(parsed.get('invoice_lines') or parsed.get('lines') or []):
|
||||
_logger.info(
|
||||
" Line %d: %s | qty=%.2f | unit=%.2f | sub=%.2f | tax=%s",
|
||||
i + 1,
|
||||
(line.get('description') or '')[:50],
|
||||
line.get('quantity', line.get('amount', 0)) or 0,
|
||||
line.get('unit_price', 0) or 0,
|
||||
line.get('subtotal', line.get('amount', 0)) or 0,
|
||||
line.get('taxes', ''),
|
||||
)
|
||||
return parsed
|
||||
|
||||
except Exception as exc:
|
||||
_logger.error("OpenAI extraction call failed: %s", exc)
|
||||
return {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Validation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _validate_invoice(mapped):
|
||||
doc_subtotal = mapped.get('subtotal') or 0.0
|
||||
if not doc_subtotal:
|
||||
return True
|
||||
|
||||
lines = mapped.get('invoice_lines') or []
|
||||
if not lines:
|
||||
return False
|
||||
|
||||
line_sum = sum(
|
||||
(l.get('subtotal') or l.get('unit_price', 0) * l.get('quantity', 1))
|
||||
for l in lines
|
||||
)
|
||||
|
||||
if line_sum == 0:
|
||||
return False
|
||||
|
||||
tolerance = 0.10
|
||||
ratio = abs(line_sum - doc_subtotal) / doc_subtotal
|
||||
return ratio <= tolerance
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Full extraction pipeline
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _process_document(self, pdf_bytes, doc_type):
|
||||
text, source = self._extract_text(pdf_bytes)
|
||||
mapped = None
|
||||
|
||||
if text:
|
||||
_logger.info("Fusion Digitize: using %s text for AI mapping", source)
|
||||
mapped = self._map_fields_from_text(text, doc_type)
|
||||
|
||||
if mapped and doc_type == 'invoice' and not self._validate_invoice(mapped):
|
||||
line_sum = sum(
|
||||
(l.get('subtotal') or 0) for l in (mapped.get('invoice_lines') or [])
|
||||
)
|
||||
_logger.info(
|
||||
"Fusion Digitize: text extraction failed validation "
|
||||
"(line_sum=%.2f vs subtotal=%.2f). Retrying with Vision.",
|
||||
line_sum, mapped.get('subtotal', 0),
|
||||
)
|
||||
vision_mapped = self._extract_via_vision(pdf_bytes, doc_type)
|
||||
if vision_mapped:
|
||||
mapped = vision_mapped
|
||||
|
||||
if not mapped:
|
||||
_logger.info("Fusion Digitize: using OpenAI Vision (no text available)")
|
||||
text = ''
|
||||
mapped = self._extract_via_vision(pdf_bytes, doc_type)
|
||||
|
||||
if not mapped:
|
||||
_logger.warning("Fusion Digitize: extraction returned no data")
|
||||
return {}
|
||||
|
||||
_logger.info(
|
||||
"Fusion Digitize: final result - %d line(s), total=%s, subtotal=%s",
|
||||
len(mapped.get('invoice_lines') or mapped.get('lines') or []),
|
||||
mapped.get('total'), mapped.get('subtotal'),
|
||||
)
|
||||
|
||||
if doc_type == 'invoice':
|
||||
return self._build_invoice_result(mapped, text)
|
||||
return self._build_bank_statement_result(mapped, text)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Response mappers: AI output -> Odoo's expected OCR format
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _ocr_field(value, candidates=None):
|
||||
return {
|
||||
'selected_value': {'content': value},
|
||||
'candidates': candidates or [],
|
||||
}
|
||||
|
||||
def _build_invoice_result(self, data, full_text=''):
|
||||
date_str = data.get('date') or ''
|
||||
if date_str and ' ' not in date_str:
|
||||
date_str += ' 00:00:00'
|
||||
|
||||
due_date_str = data.get('due_date') or ''
|
||||
if due_date_str and ' ' not in due_date_str:
|
||||
due_date_str += ' 00:00:00'
|
||||
|
||||
swift_json = json.dumps(data.get('swift_code') or {})
|
||||
|
||||
lines = []
|
||||
for line in (data.get('invoice_lines') or []):
|
||||
lines.append({
|
||||
'description': line.get('description', '/'),
|
||||
'unit_price': line.get('unit_price', 0.0),
|
||||
'quantity': line.get('quantity', 1.0),
|
||||
'taxes': line.get('taxes', []),
|
||||
'subtotal': line.get('subtotal', line.get('unit_price', 0.0)),
|
||||
'total': line.get('total', line.get('subtotal', 0.0)),
|
||||
})
|
||||
|
||||
if not lines:
|
||||
subtotal = data.get('subtotal') or data.get('total') or 0.0
|
||||
lines.append({
|
||||
'description': 'Extracted total',
|
||||
'unit_price': subtotal,
|
||||
'quantity': 1.0,
|
||||
'taxes': [],
|
||||
'subtotal': subtotal,
|
||||
'total': data.get('total') or subtotal,
|
||||
})
|
||||
|
||||
result = {
|
||||
'supplier': self._ocr_field(data.get('supplier') or ''),
|
||||
'client': self._ocr_field(data.get('client') or ''),
|
||||
'total': self._ocr_field(data.get('total') or 0.0),
|
||||
'subtotal': self._ocr_field(data.get('subtotal') or 0.0),
|
||||
'total_tax_amount': self._ocr_field(data.get('total_tax_amount') or 0.0),
|
||||
'invoice_id': self._ocr_field(data.get('invoice_id') or ''),
|
||||
'date': self._ocr_field(date_str),
|
||||
'due_date': self._ocr_field(due_date_str),
|
||||
'currency': self._ocr_field(data.get('currency') or ''),
|
||||
'VAT_Number': self._ocr_field(data.get('vat_number') or ''),
|
||||
'payment_ref': self._ocr_field(data.get('payment_ref') or ''),
|
||||
'iban': self._ocr_field(data.get('iban') or ''),
|
||||
'SWIFT_code': self._ocr_field(swift_json),
|
||||
'country': self._ocr_field(data.get('country') or ''),
|
||||
'invoice_lines': lines,
|
||||
}
|
||||
|
||||
if full_text:
|
||||
result['full_text_annotation'] = full_text
|
||||
|
||||
return result
|
||||
|
||||
def _build_bank_statement_result(self, data, full_text=''):
|
||||
date_str = data.get('date') or ''
|
||||
if date_str and ' ' not in date_str:
|
||||
date_str += ' 00:00:00'
|
||||
|
||||
lines = []
|
||||
for line in (data.get('lines') or []):
|
||||
lines.append({
|
||||
'amount': line.get('amount', 0.0),
|
||||
'date': line.get('date', ''),
|
||||
'description': line.get('description', ''),
|
||||
})
|
||||
|
||||
result = {
|
||||
'balance_start': self._ocr_field(data.get('balance_start') or 0.0),
|
||||
'balance_end': self._ocr_field(data.get('balance_end') or 0.0),
|
||||
'date': self._ocr_field(date_str),
|
||||
'bank_statement_lines': lines,
|
||||
}
|
||||
|
||||
if full_text:
|
||||
result['full_text_annotation'] = full_text
|
||||
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parse / Get Result handlers (called from model overrides)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _handle_parse(self, params, doc_type):
|
||||
documents = params.get('documents', [])
|
||||
if not documents:
|
||||
return {'status': 'error_internal', 'error_message': 'No documents provided'}
|
||||
|
||||
try:
|
||||
pdf_bytes = base64.b64decode(documents[0])
|
||||
except Exception as exc:
|
||||
_logger.error("Failed to decode document: %s", exc)
|
||||
return {'status': 'error_internal'}
|
||||
|
||||
token = str(uuid.uuid4())
|
||||
ocr_results = self._process_document(pdf_bytes, doc_type)
|
||||
|
||||
if not ocr_results:
|
||||
return {'status': 'error_internal'}
|
||||
|
||||
self.env['ir.config_parameter'].sudo().set_param(
|
||||
f'fusion_digitize.result.{token}',
|
||||
json.dumps(ocr_results),
|
||||
)
|
||||
|
||||
_logger.info(
|
||||
"Fusion Digitize: stored extraction results for token %s", token,
|
||||
)
|
||||
return {'status': 'success', 'document_token': token}
|
||||
|
||||
def _handle_get_result(self, params):
|
||||
token = params.get('document_token', '')
|
||||
key = f'fusion_digitize.result.{token}'
|
||||
ICP = self.env['ir.config_parameter'].sudo()
|
||||
stored = ICP.get_param(key, '')
|
||||
|
||||
if not stored:
|
||||
_logger.warning("No stored result for token %s", token)
|
||||
return {'status': 'error_internal'}
|
||||
|
||||
try:
|
||||
ocr_results = json.loads(stored)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
_logger.error("Corrupt stored result for token %s", token)
|
||||
return {'status': 'error_internal'}
|
||||
|
||||
ICP.set_param(key, False)
|
||||
|
||||
return {'status': 'success', 'results': [ocr_results]}
|
||||
92
fusion_digitize/models/res_config_settings.py
Normal file
92
fusion_digitize/models/res_config_settings.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2026 Nexa Systems Inc.
|
||||
# License OPL-1 (Odoo Proprietary License v1.0)
|
||||
|
||||
import logging
|
||||
|
||||
from odoo import models, fields
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ResConfigSettings(models.TransientModel):
|
||||
_inherit = 'res.config.settings'
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# OpenAI configuration
|
||||
# ------------------------------------------------------------------
|
||||
x_fd_openai_api_key = fields.Char(
|
||||
string='OpenAI API Key',
|
||||
config_parameter='fusion_digitize.openai_api_key',
|
||||
help='Dedicated API key for Fusion Digitize. Leave blank to use '
|
||||
'the key from Fusion Accounts (if installed).',
|
||||
)
|
||||
x_fd_ai_model = fields.Selection(
|
||||
selection=[
|
||||
('gpt-4o-mini', 'GPT-4o Mini (Fast, Low Cost)'),
|
||||
('gpt-4o', 'GPT-4o (Best Quality)'),
|
||||
],
|
||||
string='AI Model',
|
||||
config_parameter='fusion_digitize.ai_model',
|
||||
help='Model for text-based field mapping. GPT-4o is also used '
|
||||
'for Vision fallback on scanned documents.',
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Extraction settings
|
||||
# ------------------------------------------------------------------
|
||||
x_fd_enable_tesseract = fields.Boolean(
|
||||
string='Enable Tesseract OCR',
|
||||
config_parameter='fusion_digitize.enable_tesseract',
|
||||
help='Use local OCR for scanned PDFs before sending to AI Vision. '
|
||||
'Disable if OCR quality is poor for your documents.',
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Document type toggles
|
||||
# ------------------------------------------------------------------
|
||||
x_fd_enable_vendor_bills = fields.Boolean(
|
||||
string='Enable for Vendor Bills',
|
||||
config_parameter='fusion_digitize.enable_vendor_bills',
|
||||
help='Intercept digitization for incoming vendor bills.',
|
||||
)
|
||||
x_fd_enable_customer_invoices = fields.Boolean(
|
||||
string='Enable for Customer Invoices',
|
||||
config_parameter='fusion_digitize.enable_customer_invoices',
|
||||
help='Intercept digitization for outgoing customer invoices.',
|
||||
)
|
||||
x_fd_enable_bank_statements = fields.Boolean(
|
||||
string='Enable for Bank Statements',
|
||||
config_parameter='fusion_digitize.enable_bank_statements',
|
||||
help='Intercept digitization for bank statement imports.',
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Computed: fusion_accounts integration status
|
||||
# ------------------------------------------------------------------
|
||||
x_fd_fusion_accounts_installed = fields.Boolean(
|
||||
string='Fusion Accounts Installed',
|
||||
compute='_compute_fd_fusion_accounts_info',
|
||||
)
|
||||
x_fd_api_key_source = fields.Char(
|
||||
string='API Key Source',
|
||||
compute='_compute_fd_fusion_accounts_info',
|
||||
)
|
||||
|
||||
def _compute_fd_fusion_accounts_info(self):
|
||||
ICP = self.env['ir.config_parameter'].sudo()
|
||||
fa_installed = bool(self.env['ir.module.module'].search([
|
||||
('name', '=', 'fusion_accounts'),
|
||||
('state', '=', 'installed'),
|
||||
], limit=1))
|
||||
custom_key = ICP.get_param('fusion_digitize.openai_api_key', '')
|
||||
fa_key = ICP.get_param('fusion_accounts.openai_api_key', '')
|
||||
|
||||
for rec in self:
|
||||
rec.x_fd_fusion_accounts_installed = fa_installed
|
||||
if custom_key:
|
||||
rec.x_fd_api_key_source = 'Using dedicated Fusion Digitize key'
|
||||
elif fa_installed and fa_key:
|
||||
rec.x_fd_api_key_source = 'Using API key from Fusion Accounts'
|
||||
else:
|
||||
rec.x_fd_api_key_source = 'No API key configured'
|
||||
Reference in New Issue
Block a user