Files
gsinghpal 9ebf89bde2 changes
2026-05-16 13:18:52 -04:00

48 lines
1.6 KiB
Python

import io
from PIL import Image, ImageDraw
from odoo.tests import tagged
from odoo.tests.common import TransactionCase
from odoo.addons.fusion_accounting_ocr.services.ocr_providers.tesseract_adapter import (
TesseractAdapter,
)
@tagged('post_install', '-at_install')
class TestTesseractAdapter(TransactionCase):
def test_is_available(self):
# In our container tesseract + pytesseract + pdf2image are pre-installed.
self.assertTrue(TesseractAdapter.is_available())
def test_extract_simple_text_image(self):
# Generate a tiny PNG with the text "INVOICE 12345 Total $100".
# Use a slightly larger image and try to load a TTF font for
# tesseract reliability; fall back to default bitmap font otherwise.
img = Image.new('RGB', (800, 120), color='white')
draw = ImageDraw.Draw(img)
try:
from PIL import ImageFont
font = ImageFont.truetype(
'/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 36,
)
except Exception:
font = None
draw.text((20, 30), "INVOICE 12345 Total $100", fill='black', font=font)
buf = io.BytesIO()
img.save(buf, format='PNG')
png_bytes = buf.getvalue()
adapter = TesseractAdapter()
result = adapter.extract(png_bytes, mimetype='image/png')
self.assertEqual(result.backend, 'tesseract')
self.assertEqual(result.error, '')
self.assertEqual(result.pages, 1)
self.assertGreater(len(result.raw_text), 0)
# Tesseract should pick up the digits at minimum.
self.assertIn('12345', result.raw_text.replace(' ', ''))