feat(fusion_accounting_bank_rec): precedent_lookup K-nearest search
Made-with: Cursor
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
from . import memo_tokenizer
|
from . import memo_tokenizer
|
||||||
from . import exchange_diff
|
from . import exchange_diff
|
||||||
from . import matching_strategies
|
from . import matching_strategies
|
||||||
|
from . import precedent_lookup
|
||||||
|
|||||||
62
fusion_accounting_bank_rec/services/precedent_lookup.py
Normal file
62
fusion_accounting_bank_rec/services/precedent_lookup.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""K-nearest precedent search.
|
||||||
|
|
||||||
|
Given a new bank line, find the most similar past reconciliations for
|
||||||
|
ranking + confidence scoring. Distance metric: amount delta (primary),
|
||||||
|
date recency (secondary), memo token overlap (tertiary).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PrecedentMatch:
|
||||||
|
precedent_id: int
|
||||||
|
amount: float
|
||||||
|
memo_tokens: str
|
||||||
|
matched_move_line_count: int
|
||||||
|
similarity_score: float
|
||||||
|
|
||||||
|
|
||||||
|
AMOUNT_TOLERANCE_PCT = 0.01 # 1% tolerance for "near" amount
|
||||||
|
|
||||||
|
|
||||||
|
def find_nearest_precedents(env, *, partner_id, amount, k=5, memo_tokens=None):
|
||||||
|
"""Return up to k most-similar precedents for a partner+amount.
|
||||||
|
|
||||||
|
Indexed query: filters by partner first (cheap), then ranks by
|
||||||
|
amount distance + memo overlap. Sub-50ms for typical Westin volume."""
|
||||||
|
Precedent = env['fusion.reconcile.precedent'].sudo()
|
||||||
|
|
||||||
|
tolerance = max(amount * AMOUNT_TOLERANCE_PCT, 1.00)
|
||||||
|
candidates = Precedent.search([
|
||||||
|
('partner_id', '=', partner_id),
|
||||||
|
('amount', '>=', amount - tolerance),
|
||||||
|
('amount', '<=', amount + tolerance),
|
||||||
|
], limit=k * 4, order='reconciled_at desc')
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for p in candidates:
|
||||||
|
amount_score = 1.0 - min(abs(p.amount - amount) / max(amount, 1), 1.0)
|
||||||
|
memo_score = _memo_overlap(p.memo_tokens, memo_tokens) if memo_tokens else 0.5
|
||||||
|
similarity = (amount_score * 0.7) + (memo_score * 0.3)
|
||||||
|
results.append(PrecedentMatch(
|
||||||
|
precedent_id=p.id,
|
||||||
|
amount=p.amount,
|
||||||
|
memo_tokens=p.memo_tokens or '',
|
||||||
|
matched_move_line_count=p.matched_move_line_count,
|
||||||
|
similarity_score=similarity,
|
||||||
|
))
|
||||||
|
|
||||||
|
results.sort(key=lambda r: -r.similarity_score)
|
||||||
|
return results[:k]
|
||||||
|
|
||||||
|
|
||||||
|
def _memo_overlap(precedent_tokens_str, new_tokens) -> float:
|
||||||
|
"""Jaccard similarity between two token sets."""
|
||||||
|
if not precedent_tokens_str or not new_tokens:
|
||||||
|
return 0.0
|
||||||
|
precedent_set = set(precedent_tokens_str.split(','))
|
||||||
|
new_set = set(new_tokens) if not isinstance(new_tokens, set) else new_tokens
|
||||||
|
if not precedent_set and not new_set:
|
||||||
|
return 0.0
|
||||||
|
return len(precedent_set & new_set) / len(precedent_set | new_set)
|
||||||
@@ -2,3 +2,4 @@ from . import test_memo_tokenizer
|
|||||||
from . import test_exchange_diff
|
from . import test_exchange_diff
|
||||||
from . import test_matching_strategies
|
from . import test_matching_strategies
|
||||||
from . import test_ai_suggestion_lifecycle
|
from . import test_ai_suggestion_lifecycle
|
||||||
|
from . import test_precedent_lookup
|
||||||
|
|||||||
73
fusion_accounting_bank_rec/tests/test_precedent_lookup.py
Normal file
73
fusion_accounting_bank_rec/tests/test_precedent_lookup.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
from datetime import date
|
||||||
|
from odoo.tests.common import TransactionCase, tagged
|
||||||
|
from odoo.addons.fusion_accounting_bank_rec.services.precedent_lookup import (
|
||||||
|
find_nearest_precedents, PrecedentMatch,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@tagged('post_install', '-at_install')
|
||||||
|
class TestPrecedentLookup(TransactionCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super().setUp()
|
||||||
|
self.partner = self.env['res.partner'].create({'name': 'Precedent Lookup Partner'})
|
||||||
|
self.currency = self.env.ref('base.CAD')
|
||||||
|
self.company = self.env.company
|
||||||
|
for amt in [1847.50, 1847.50, 1800.00]:
|
||||||
|
self.env['fusion.reconcile.precedent'].create({
|
||||||
|
'company_id': self.company.id,
|
||||||
|
'partner_id': self.partner.id,
|
||||||
|
'amount': amt,
|
||||||
|
'currency_id': self.currency.id,
|
||||||
|
'date': date.today(),
|
||||||
|
'memo_tokens': 'RBC,ETF,REF',
|
||||||
|
'matched_move_line_count': 1,
|
||||||
|
'source': 'manual',
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_finds_amount_exact_precedents(self):
|
||||||
|
results = find_nearest_precedents(
|
||||||
|
self.env, partner_id=self.partner.id, amount=1847.50, k=5)
|
||||||
|
amounts = [r.amount for r in results]
|
||||||
|
self.assertEqual(amounts.count(1847.50), 2)
|
||||||
|
|
||||||
|
def test_returns_empty_for_unknown_partner(self):
|
||||||
|
results = find_nearest_precedents(
|
||||||
|
self.env, partner_id=999999, amount=1847.50, k=5)
|
||||||
|
self.assertEqual(results, [])
|
||||||
|
|
||||||
|
def test_respects_k_limit(self):
|
||||||
|
for i in range(10):
|
||||||
|
self.env['fusion.reconcile.precedent'].create({
|
||||||
|
'company_id': self.company.id,
|
||||||
|
'partner_id': self.partner.id,
|
||||||
|
'amount': 1847.50,
|
||||||
|
'currency_id': self.currency.id,
|
||||||
|
'date': date.today(),
|
||||||
|
'matched_move_line_count': 1,
|
||||||
|
'source': 'manual',
|
||||||
|
})
|
||||||
|
results = find_nearest_precedents(
|
||||||
|
self.env, partner_id=self.partner.id, amount=1847.50, k=3)
|
||||||
|
self.assertEqual(len(results), 3)
|
||||||
|
|
||||||
|
def test_results_sorted_by_similarity_desc(self):
|
||||||
|
results = find_nearest_precedents(
|
||||||
|
self.env, partner_id=self.partner.id, amount=1847.50, k=5)
|
||||||
|
if len(results) >= 2:
|
||||||
|
self.assertGreaterEqual(results[0].similarity_score, results[1].similarity_score)
|
||||||
|
|
||||||
|
def test_memo_overlap_boosts_score(self):
|
||||||
|
results_with_memo = find_nearest_precedents(
|
||||||
|
self.env, partner_id=self.partner.id, amount=1847.50, k=5,
|
||||||
|
memo_tokens=['RBC', 'ETF', 'REF'])
|
||||||
|
results_no_memo = find_nearest_precedents(
|
||||||
|
self.env, partner_id=self.partner.id, amount=1847.50, k=5)
|
||||||
|
if results_with_memo and results_no_memo:
|
||||||
|
self.assertGreaterEqual(results_with_memo[0].similarity_score,
|
||||||
|
results_no_memo[0].similarity_score - 0.001)
|
||||||
|
|
||||||
|
def test_amount_outside_tolerance_excluded(self):
|
||||||
|
results = find_nearest_precedents(
|
||||||
|
self.env, partner_id=self.partner.id, amount=2000.00, k=5)
|
||||||
|
self.assertEqual(results, [])
|
||||||
Reference in New Issue
Block a user