From 91d09dfca2f93fe79101199538d2fbd63ee78ab2 Mon Sep 17 00:00:00 2001 From: gsinghpal Date: Sun, 19 Apr 2026 10:30:24 -0400 Subject: [PATCH] feat(fusion_accounting_bank_rec): precedent_lookup K-nearest search Made-with: Cursor --- .../services/__init__.py | 1 + .../services/precedent_lookup.py | 62 ++++++++++++++++ fusion_accounting_bank_rec/tests/__init__.py | 1 + .../tests/test_precedent_lookup.py | 73 +++++++++++++++++++ 4 files changed, 137 insertions(+) create mode 100644 fusion_accounting_bank_rec/services/precedent_lookup.py create mode 100644 fusion_accounting_bank_rec/tests/test_precedent_lookup.py diff --git a/fusion_accounting_bank_rec/services/__init__.py b/fusion_accounting_bank_rec/services/__init__.py index a08ba335..a61833a4 100644 --- a/fusion_accounting_bank_rec/services/__init__.py +++ b/fusion_accounting_bank_rec/services/__init__.py @@ -1,3 +1,4 @@ from . import memo_tokenizer from . import exchange_diff from . import matching_strategies +from . import precedent_lookup diff --git a/fusion_accounting_bank_rec/services/precedent_lookup.py b/fusion_accounting_bank_rec/services/precedent_lookup.py new file mode 100644 index 00000000..1619cf4e --- /dev/null +++ b/fusion_accounting_bank_rec/services/precedent_lookup.py @@ -0,0 +1,62 @@ +"""K-nearest precedent search. + +Given a new bank line, find the most similar past reconciliations for +ranking + confidence scoring. Distance metric: amount delta (primary), +date recency (secondary), memo token overlap (tertiary). +""" + +from dataclasses import dataclass + + +@dataclass +class PrecedentMatch: + precedent_id: int + amount: float + memo_tokens: str + matched_move_line_count: int + similarity_score: float + + +AMOUNT_TOLERANCE_PCT = 0.01 # 1% tolerance for "near" amount + + +def find_nearest_precedents(env, *, partner_id, amount, k=5, memo_tokens=None): + """Return up to k most-similar precedents for a partner+amount. + + Indexed query: filters by partner first (cheap), then ranks by + amount distance + memo overlap. Sub-50ms for typical Westin volume.""" + Precedent = env['fusion.reconcile.precedent'].sudo() + + tolerance = max(amount * AMOUNT_TOLERANCE_PCT, 1.00) + candidates = Precedent.search([ + ('partner_id', '=', partner_id), + ('amount', '>=', amount - tolerance), + ('amount', '<=', amount + tolerance), + ], limit=k * 4, order='reconciled_at desc') + + results = [] + for p in candidates: + amount_score = 1.0 - min(abs(p.amount - amount) / max(amount, 1), 1.0) + memo_score = _memo_overlap(p.memo_tokens, memo_tokens) if memo_tokens else 0.5 + similarity = (amount_score * 0.7) + (memo_score * 0.3) + results.append(PrecedentMatch( + precedent_id=p.id, + amount=p.amount, + memo_tokens=p.memo_tokens or '', + matched_move_line_count=p.matched_move_line_count, + similarity_score=similarity, + )) + + results.sort(key=lambda r: -r.similarity_score) + return results[:k] + + +def _memo_overlap(precedent_tokens_str, new_tokens) -> float: + """Jaccard similarity between two token sets.""" + if not precedent_tokens_str or not new_tokens: + return 0.0 + precedent_set = set(precedent_tokens_str.split(',')) + new_set = set(new_tokens) if not isinstance(new_tokens, set) else new_tokens + if not precedent_set and not new_set: + return 0.0 + return len(precedent_set & new_set) / len(precedent_set | new_set) diff --git a/fusion_accounting_bank_rec/tests/__init__.py b/fusion_accounting_bank_rec/tests/__init__.py index 860a17b7..6585451d 100644 --- a/fusion_accounting_bank_rec/tests/__init__.py +++ b/fusion_accounting_bank_rec/tests/__init__.py @@ -2,3 +2,4 @@ from . import test_memo_tokenizer from . import test_exchange_diff from . import test_matching_strategies from . import test_ai_suggestion_lifecycle +from . import test_precedent_lookup diff --git a/fusion_accounting_bank_rec/tests/test_precedent_lookup.py b/fusion_accounting_bank_rec/tests/test_precedent_lookup.py new file mode 100644 index 00000000..191b5ade --- /dev/null +++ b/fusion_accounting_bank_rec/tests/test_precedent_lookup.py @@ -0,0 +1,73 @@ +from datetime import date +from odoo.tests.common import TransactionCase, tagged +from odoo.addons.fusion_accounting_bank_rec.services.precedent_lookup import ( + find_nearest_precedents, PrecedentMatch, +) + + +@tagged('post_install', '-at_install') +class TestPrecedentLookup(TransactionCase): + + def setUp(self): + super().setUp() + self.partner = self.env['res.partner'].create({'name': 'Precedent Lookup Partner'}) + self.currency = self.env.ref('base.CAD') + self.company = self.env.company + for amt in [1847.50, 1847.50, 1800.00]: + self.env['fusion.reconcile.precedent'].create({ + 'company_id': self.company.id, + 'partner_id': self.partner.id, + 'amount': amt, + 'currency_id': self.currency.id, + 'date': date.today(), + 'memo_tokens': 'RBC,ETF,REF', + 'matched_move_line_count': 1, + 'source': 'manual', + }) + + def test_finds_amount_exact_precedents(self): + results = find_nearest_precedents( + self.env, partner_id=self.partner.id, amount=1847.50, k=5) + amounts = [r.amount for r in results] + self.assertEqual(amounts.count(1847.50), 2) + + def test_returns_empty_for_unknown_partner(self): + results = find_nearest_precedents( + self.env, partner_id=999999, amount=1847.50, k=5) + self.assertEqual(results, []) + + def test_respects_k_limit(self): + for i in range(10): + self.env['fusion.reconcile.precedent'].create({ + 'company_id': self.company.id, + 'partner_id': self.partner.id, + 'amount': 1847.50, + 'currency_id': self.currency.id, + 'date': date.today(), + 'matched_move_line_count': 1, + 'source': 'manual', + }) + results = find_nearest_precedents( + self.env, partner_id=self.partner.id, amount=1847.50, k=3) + self.assertEqual(len(results), 3) + + def test_results_sorted_by_similarity_desc(self): + results = find_nearest_precedents( + self.env, partner_id=self.partner.id, amount=1847.50, k=5) + if len(results) >= 2: + self.assertGreaterEqual(results[0].similarity_score, results[1].similarity_score) + + def test_memo_overlap_boosts_score(self): + results_with_memo = find_nearest_precedents( + self.env, partner_id=self.partner.id, amount=1847.50, k=5, + memo_tokens=['RBC', 'ETF', 'REF']) + results_no_memo = find_nearest_precedents( + self.env, partner_id=self.partner.id, amount=1847.50, k=5) + if results_with_memo and results_no_memo: + self.assertGreaterEqual(results_with_memo[0].similarity_score, + results_no_memo[0].similarity_score - 0.001) + + def test_amount_outside_tolerance_excluded(self): + results = find_nearest_precedents( + self.env, partner_id=self.partner.id, amount=2000.00, k=5) + self.assertEqual(results, [])