feat(fusion_accounting_bank_rec): memo_tokenizer for Canadian bank memo formats
Made-with: Cursor
This commit is contained in:
@@ -0,0 +1 @@
|
|||||||
|
from . import memo_tokenizer
|
||||||
|
|||||||
44
fusion_accounting_bank_rec/services/memo_tokenizer.py
Normal file
44
fusion_accounting_bank_rec/services/memo_tokenizer.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
"""Extract searchable tokens from Canadian bank statement memos.
|
||||||
|
|
||||||
|
Handles common memo formats from RBC, TD, Scotia, BMO, plus generic
|
||||||
|
cheque-number and reference-number patterns. Output is normalized
|
||||||
|
(uppercase, alphanumeric) for case-insensitive matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
REF_PATTERNS = [
|
||||||
|
(re.compile(r'\b(REF|REFERENCE)\s*#?\s*(\d+)\b', re.I), r'REF\2'),
|
||||||
|
(re.compile(r'\b(CHQ|CHEQUE|CHECK)\s*#?\s*(\d+)\b', re.I), r'CHEQUE\2'),
|
||||||
|
(re.compile(r'\b(INV|INVOICE)\s*#?\s*(\d+)\b', re.I), r'INV\2'),
|
||||||
|
]
|
||||||
|
|
||||||
|
MIN_TOKEN_LENGTH = 2
|
||||||
|
|
||||||
|
|
||||||
|
def tokenize_memo(memo: str | None) -> list[str]:
|
||||||
|
"""Return list of normalized tokens from a bank memo.
|
||||||
|
|
||||||
|
Empty/None input returns []. Order preserved (first occurrence wins
|
||||||
|
for de-duplication)."""
|
||||||
|
if not memo:
|
||||||
|
return []
|
||||||
|
|
||||||
|
text = memo.upper()
|
||||||
|
for pattern, replacement in REF_PATTERNS:
|
||||||
|
text = pattern.sub(replacement, text)
|
||||||
|
|
||||||
|
text = re.sub(r'[^A-Z0-9]+', ' ', text)
|
||||||
|
raw_tokens = text.split()
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
tokens = []
|
||||||
|
for tok in raw_tokens:
|
||||||
|
if len(tok) < MIN_TOKEN_LENGTH:
|
||||||
|
continue
|
||||||
|
if tok in seen:
|
||||||
|
continue
|
||||||
|
seen.add(tok)
|
||||||
|
tokens.append(tok)
|
||||||
|
|
||||||
|
return tokens
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
from . import test_memo_tokenizer
|
||||||
|
|||||||
42
fusion_accounting_bank_rec/tests/test_memo_tokenizer.py
Normal file
42
fusion_accounting_bank_rec/tests/test_memo_tokenizer.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from odoo.tests.common import TransactionCase, tagged
|
||||||
|
from odoo.addons.fusion_accounting_bank_rec.services.memo_tokenizer import tokenize_memo
|
||||||
|
|
||||||
|
|
||||||
|
@tagged('post_install', '-at_install')
|
||||||
|
class TestMemoTokenizer(TransactionCase):
|
||||||
|
|
||||||
|
def test_extracts_rbc_etf_reference(self):
|
||||||
|
tokens = tokenize_memo("RBC ETF DEP REF 4831")
|
||||||
|
self.assertIn('RBC', tokens)
|
||||||
|
self.assertIn('ETF', tokens)
|
||||||
|
self.assertIn('REF4831', tokens)
|
||||||
|
|
||||||
|
def test_extracts_cheque_number(self):
|
||||||
|
tokens = tokenize_memo("CHEQUE 4827 - WESTIN PLATING")
|
||||||
|
self.assertIn('CHEQUE4827', tokens)
|
||||||
|
self.assertIn('WESTIN', tokens)
|
||||||
|
self.assertIn('PLATING', tokens)
|
||||||
|
|
||||||
|
def test_strips_noise_tokens(self):
|
||||||
|
tokens = tokenize_memo("PAYMENT - INV - DEP - 12345")
|
||||||
|
self.assertNotIn('-', tokens)
|
||||||
|
self.assertEqual([t for t in tokens if len(t) <= 1], [])
|
||||||
|
|
||||||
|
def test_handles_empty_memo(self):
|
||||||
|
self.assertEqual(tokenize_memo(""), [])
|
||||||
|
self.assertEqual(tokenize_memo(None), [])
|
||||||
|
|
||||||
|
def test_canadian_french_memo(self):
|
||||||
|
tokens = tokenize_memo("PAIEMENT VIREMENT BANCAIRE")
|
||||||
|
self.assertIn('PAIEMENT', tokens)
|
||||||
|
self.assertIn('VIREMENT', tokens)
|
||||||
|
|
||||||
|
def test_normalises_case(self):
|
||||||
|
tokens = tokenize_memo("rbc etf dep ref 4831")
|
||||||
|
self.assertIn('RBC', tokens)
|
||||||
|
|
||||||
|
def test_handles_special_characters(self):
|
||||||
|
tokens = tokenize_memo("RBC*PAYMENT/REF#4831")
|
||||||
|
self.assertIn('RBC', tokens)
|
||||||
|
self.assertIn('PAYMENT', tokens)
|
||||||
|
self.assertIn('REF4831', tokens)
|
||||||
Reference in New Issue
Block a user