Odoo-Modules/fusion_accounting_ocr/tests/test_tesseract_adapter.py

import io

from PIL import Image, ImageDraw

from odoo.tests import tagged
from odoo.tests.common import TransactionCase

from odoo.addons.fusion_accounting_ocr.services.ocr_providers.tesseract_adapter import (
    TesseractAdapter,
)


@tagged('post_install', '-at_install')
class TestTesseractAdapter(TransactionCase):

    def test_is_available(self):
        # In our container tesseract + pytesseract + pdf2image are pre-installed.
        self.assertTrue(TesseractAdapter.is_available())

    def test_extract_simple_text_image(self):
        # Generate a tiny PNG with the text "INVOICE 12345 Total $100".
        # Use a slightly larger image and try to load a TTF font for
        # tesseract reliability; fall back to default bitmap font otherwise.
        img = Image.new('RGB', (800, 120), color='white')
        draw = ImageDraw.Draw(img)
        try:
            from PIL import ImageFont
            font = ImageFont.truetype(
                '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 36,
            )
        except Exception:
            font = None
        draw.text((20, 30), "INVOICE 12345 Total $100", fill='black', font=font)

        buf = io.BytesIO()
        img.save(buf, format='PNG')
        png_bytes = buf.getvalue()

        adapter = TesseractAdapter()
        result = adapter.extract(png_bytes, mimetype='image/png')

        self.assertEqual(result.backend, 'tesseract')
        self.assertEqual(result.error, '')
        self.assertEqual(result.pages, 1)
        self.assertGreater(len(result.raw_text), 0)
        # Tesseract should pick up the digits at minimum.
        self.assertIn('12345', result.raw_text.replace(' ', ''))