import io from PIL import Image, ImageDraw from odoo.tests import tagged from odoo.tests.common import TransactionCase from odoo.addons.fusion_accounting_ocr.services.ocr_providers.tesseract_adapter import ( TesseractAdapter, ) @tagged('post_install', '-at_install') class TestTesseractAdapter(TransactionCase): def test_is_available(self): # In our container tesseract + pytesseract + pdf2image are pre-installed. self.assertTrue(TesseractAdapter.is_available()) def test_extract_simple_text_image(self): # Generate a tiny PNG with the text "INVOICE 12345 Total $100". # Use a slightly larger image and try to load a TTF font for # tesseract reliability; fall back to default bitmap font otherwise. img = Image.new('RGB', (800, 120), color='white') draw = ImageDraw.Draw(img) try: from PIL import ImageFont font = ImageFont.truetype( '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', 36, ) except Exception: font = None draw.text((20, 30), "INVOICE 12345 Total $100", fill='black', font=font) buf = io.BytesIO() img.save(buf, format='PNG') png_bytes = buf.getvalue() adapter = TesseractAdapter() result = adapter.extract(png_bytes, mimetype='image/png') self.assertEqual(result.backend, 'tesseract') self.assertEqual(result.error, '') self.assertEqual(result.pages, 1) self.assertGreater(len(result.raw_text), 0) # Tesseract should pick up the digits at minimum. self.assertIn('12345', result.raw_text.replace(' ', ''))