changes
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
from . import memo_tokenizer
|
||||
from . import exchange_diff
|
||||
from . import matching_strategies
|
||||
from . import precedent_lookup
|
||||
from . import pattern_extractor
|
||||
from . import confidence_scoring
|
||||
from . import precedent_backfill
|
||||
@@ -0,0 +1,178 @@
|
||||
"""4-pass confidence scoring pipeline.
|
||||
|
||||
Pass 1: SQL filter — partner match + reconcilable account (done by caller — engine._fetch_candidates)
|
||||
Pass 2: Statistical scoring — amount delta + pattern match + precedent similarity
|
||||
Pass 3: AI re-rank (if provider configured) — feed top 5 to LLM, parse JSON ranking
|
||||
Pass 4: Persist as fusion.reconcile.suggestion rows (done by caller — engine.suggest_matches)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .matching_strategies import Candidate
|
||||
from .precedent_lookup import find_nearest_precedents
|
||||
from .memo_tokenizer import tokenize_memo
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScoredCandidate:
|
||||
candidate_id: int
|
||||
confidence: float
|
||||
reasoning: str
|
||||
score_amount_match: float
|
||||
score_partner_pattern: float
|
||||
score_precedent_similarity: float
|
||||
score_ai_rerank: float = 0.0
|
||||
|
||||
|
||||
def score_candidates(env, *, statement_line, candidates, k=5, use_ai=True):
|
||||
"""Score and rank candidate matches for a statement line.
|
||||
|
||||
Args:
|
||||
env: Odoo env
|
||||
statement_line: account.bank.statement.line recordset (singleton)
|
||||
candidates: list of Candidate dataclasses (from matching_strategies)
|
||||
k: max number of scored candidates to return
|
||||
use_ai: if True AND a provider is configured, invoke AI re-rank
|
||||
|
||||
Returns:
|
||||
list of ScoredCandidate sorted by confidence desc, max length k.
|
||||
"""
|
||||
if not candidates or not statement_line:
|
||||
return []
|
||||
|
||||
partner_id = statement_line.partner_id.id if statement_line.partner_id else None
|
||||
bank_amount = abs(statement_line.amount)
|
||||
memo_tokens = tokenize_memo(statement_line.payment_ref)
|
||||
|
||||
pattern = None
|
||||
if partner_id:
|
||||
pattern = env['fusion.reconcile.pattern'].sudo().search(
|
||||
[('partner_id', '=', partner_id)], limit=1)
|
||||
if not pattern:
|
||||
pattern = None
|
||||
|
||||
precedents = []
|
||||
if partner_id:
|
||||
precedents = find_nearest_precedents(
|
||||
env, partner_id=partner_id, amount=bank_amount, k=5, memo_tokens=memo_tokens)
|
||||
|
||||
scored = []
|
||||
for cand in candidates:
|
||||
amount_score = 1.0 - min(abs(cand.amount - bank_amount) / max(bank_amount, 1), 1.0)
|
||||
pattern_score = _pattern_score(cand, pattern, bank_amount)
|
||||
precedent_score = _precedent_score(cand, precedents)
|
||||
confidence = (amount_score * 0.5) + (pattern_score * 0.25) + (precedent_score * 0.25)
|
||||
|
||||
reasoning = _build_reasoning(amount_score, pattern_score, precedent_score, pattern)
|
||||
scored.append(ScoredCandidate(
|
||||
candidate_id=cand.id,
|
||||
confidence=round(confidence, 3),
|
||||
reasoning=reasoning,
|
||||
score_amount_match=round(amount_score, 3),
|
||||
score_partner_pattern=round(pattern_score, 3),
|
||||
score_precedent_similarity=round(precedent_score, 3),
|
||||
))
|
||||
|
||||
scored.sort(key=lambda s: -s.confidence)
|
||||
top_k = scored[:k]
|
||||
|
||||
if use_ai:
|
||||
provider = _get_provider(env, 'bank_rec_suggest')
|
||||
if provider is not None:
|
||||
try:
|
||||
top_k = _ai_rerank(env, provider, statement_line, top_k, pattern, precedents)
|
||||
except Exception as e:
|
||||
_logger.warning("AI re-rank failed, using statistical scoring: %s", e)
|
||||
|
||||
return top_k
|
||||
|
||||
|
||||
def _pattern_score(cand, pattern, bank_amount) -> float:
|
||||
"""How well does this candidate fit the partner's typical pattern?"""
|
||||
if not pattern:
|
||||
return 0.5
|
||||
score = 0.5
|
||||
if pattern.pref_strategy == 'exact_amount' and abs(cand.amount - bank_amount) < 0.005:
|
||||
score = 1.0
|
||||
return score
|
||||
|
||||
|
||||
def _precedent_score(cand, precedents) -> float:
|
||||
"""How similar is this candidate to past precedents?"""
|
||||
if not precedents:
|
||||
return 0.5
|
||||
best = max((p.similarity_score for p in precedents), default=0.5)
|
||||
return best
|
||||
|
||||
|
||||
def _build_reasoning(amount_score, pattern_score, precedent_score, pattern) -> str:
|
||||
parts = []
|
||||
if amount_score >= 0.99:
|
||||
parts.append("Exact amount match")
|
||||
elif amount_score >= 0.95:
|
||||
parts.append("Amount close")
|
||||
if pattern and pattern.reconcile_count > 5:
|
||||
parts.append(f"Matches partner's {pattern.reconcile_count}-reconcile pattern")
|
||||
if precedent_score >= 0.8:
|
||||
parts.append("Strong precedent match")
|
||||
return " · ".join(parts) if parts else "Weak signal"
|
||||
|
||||
|
||||
def _get_provider(env, feature_name):
|
||||
"""Look up provider name from per-feature config; instantiate adapter.
|
||||
|
||||
Returns None if no provider configured (statistical-only mode)."""
|
||||
param = env['ir.config_parameter'].sudo()
|
||||
provider_name = param.get_param(f'fusion_accounting.provider.{feature_name}')
|
||||
if not provider_name:
|
||||
provider_name = param.get_param('fusion_accounting.provider.default')
|
||||
if not provider_name:
|
||||
return None
|
||||
try:
|
||||
from odoo.addons.fusion_accounting_ai.services.adapters.openai_adapter import OpenAIAdapter
|
||||
from odoo.addons.fusion_accounting_ai.services.adapters.claude import ClaudeAdapter
|
||||
except ImportError:
|
||||
_logger.warning("fusion_accounting_ai adapters not importable")
|
||||
return None
|
||||
if provider_name.startswith('openai'):
|
||||
return OpenAIAdapter(env)
|
||||
elif provider_name.startswith('claude'):
|
||||
return ClaudeAdapter(env)
|
||||
return None
|
||||
|
||||
|
||||
def _ai_rerank(env, provider, statement_line, scored, pattern, precedents):
|
||||
"""Send top-K candidates + features to LLM for re-rank. Parse JSON response.
|
||||
|
||||
On any failure (network, JSON parse, missing key), return scored unchanged."""
|
||||
try:
|
||||
from odoo.addons.fusion_accounting_ai.services.prompts.bank_rec_prompt import build_prompt
|
||||
except ImportError:
|
||||
_logger.debug("bank_rec_prompt not yet available; skipping AI re-rank")
|
||||
return scored
|
||||
|
||||
system, user = build_prompt(statement_line, scored, pattern, precedents)
|
||||
response = provider.complete(
|
||||
system=system,
|
||||
messages=[{'role': 'user', 'content': user}],
|
||||
max_tokens=800,
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
try:
|
||||
parsed = json.loads(response['content'])
|
||||
except (json.JSONDecodeError, KeyError, TypeError):
|
||||
return scored
|
||||
|
||||
ai_order = {item['candidate_id']: item for item in parsed.get('ranked', [])}
|
||||
for s in scored:
|
||||
if s.candidate_id in ai_order:
|
||||
s.score_ai_rerank = ai_order[s.candidate_id].get('confidence', s.confidence)
|
||||
s.reasoning = ai_order[s.candidate_id].get('reason', s.reasoning)
|
||||
s.confidence = round((s.confidence * 0.4) + (s.score_ai_rerank * 0.6), 3)
|
||||
scored.sort(key=lambda x: -x.confidence)
|
||||
return scored
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Exchange-difference calculation helper.
|
||||
|
||||
Pure-Python FX gain/loss computation. The engine uses this for rapid
|
||||
pre-checks; Odoo's account.move._create_exchange_difference_move() is
|
||||
invoked separately for the actual GL posting.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExchangeDiffResult:
|
||||
needs_diff_move: bool
|
||||
diff_amount: float # in company currency; positive = gain, negative = loss
|
||||
line_company_amount: float
|
||||
against_company_amount: float
|
||||
|
||||
|
||||
def compute_exchange_diff(*, line_amount, line_currency_code, against_amount,
|
||||
against_currency_code, line_rate, against_rate) -> ExchangeDiffResult:
|
||||
"""Compute whether an exchange-diff move is needed and its magnitude.
|
||||
|
||||
Args:
|
||||
line_amount: Bank line amount in its currency
|
||||
line_currency_code: e.g. 'USD'
|
||||
against_amount: Matched journal item amount in its currency
|
||||
against_currency_code: e.g. 'USD' (or different)
|
||||
line_rate: FX rate (foreign per company currency) at line date
|
||||
against_rate: FX rate at journal item posting date
|
||||
|
||||
Returns:
|
||||
ExchangeDiffResult with needs_diff_move flag and computed diff
|
||||
in company currency (positive = gain, negative = loss).
|
||||
"""
|
||||
line_company = line_amount * line_rate
|
||||
against_company = against_amount * against_rate
|
||||
|
||||
diff = line_company - against_company
|
||||
needs_diff = abs(diff) > 0.005 # rounding tolerance
|
||||
|
||||
return ExchangeDiffResult(
|
||||
needs_diff_move=needs_diff,
|
||||
diff_amount=round(diff, 2),
|
||||
line_company_amount=round(line_company, 2),
|
||||
against_company_amount=round(against_company, 2),
|
||||
)
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Matching strategy classes for the reconcile engine.
|
||||
|
||||
Each strategy takes a bank amount + list of candidate journal items
|
||||
and returns a MatchResult with the picked ids + confidence + residual.
|
||||
Strategies are pure Python; no ORM dependency.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from itertools import combinations
|
||||
|
||||
|
||||
@dataclass
|
||||
class Candidate:
|
||||
id: int
|
||||
amount: float
|
||||
partner_id: int
|
||||
age_days: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchResult:
|
||||
picked_ids: list[int] = field(default_factory=list)
|
||||
confidence: float = 0.0
|
||||
residual: float = 0.0 # bank_amount - sum(picked); positive = under-allocated
|
||||
strategy_name: str = ""
|
||||
|
||||
|
||||
AMOUNT_TOLERANCE = 0.005 # currency rounding tolerance
|
||||
|
||||
|
||||
class AmountExactStrategy:
|
||||
"""Pick a single candidate whose amount equals the bank amount exactly.
|
||||
If multiple candidates match exactly, pick the oldest (FIFO tiebreaker)."""
|
||||
|
||||
def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
|
||||
exact = [c for c in candidates if abs(c.amount - bank_amount) < AMOUNT_TOLERANCE]
|
||||
if not exact:
|
||||
return MatchResult(strategy_name='amount_exact')
|
||||
oldest = max(exact, key=lambda c: c.age_days)
|
||||
return MatchResult(
|
||||
picked_ids=[oldest.id],
|
||||
confidence=1.0,
|
||||
residual=0.0,
|
||||
strategy_name='amount_exact',
|
||||
)
|
||||
|
||||
|
||||
class FIFOStrategy:
|
||||
"""Pick oldest candidates first until the bank amount is exhausted.
|
||||
May produce partial reconcile residual if last candidate doesn't fit exactly."""
|
||||
|
||||
def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
|
||||
if not candidates:
|
||||
return MatchResult(strategy_name='fifo')
|
||||
oldest_first = sorted(candidates, key=lambda c: -c.age_days)
|
||||
picked = []
|
||||
remaining = bank_amount
|
||||
for c in oldest_first:
|
||||
if remaining <= AMOUNT_TOLERANCE:
|
||||
break
|
||||
picked.append(c.id)
|
||||
remaining -= c.amount
|
||||
|
||||
confidence = 0.7 if remaining < AMOUNT_TOLERANCE else 0.5
|
||||
return MatchResult(
|
||||
picked_ids=picked,
|
||||
confidence=confidence,
|
||||
residual=remaining,
|
||||
strategy_name='fifo',
|
||||
)
|
||||
|
||||
|
||||
class MultiInvoiceStrategy:
|
||||
"""Find the smallest combination of candidates summing to the bank amount.
|
||||
Bounded by max_combinations to keep complexity manageable."""
|
||||
|
||||
def __init__(self, max_combinations=3):
|
||||
self.max_combinations = max_combinations
|
||||
|
||||
def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
|
||||
for k in range(2, self.max_combinations + 1):
|
||||
for combo in combinations(candidates, k):
|
||||
total = sum(c.amount for c in combo)
|
||||
if abs(total - bank_amount) < AMOUNT_TOLERANCE:
|
||||
return MatchResult(
|
||||
picked_ids=[c.id for c in combo],
|
||||
confidence=0.85,
|
||||
residual=0.0,
|
||||
strategy_name=f'multi_invoice_{k}',
|
||||
)
|
||||
return MatchResult(strategy_name='multi_invoice')
|
||||
@@ -0,0 +1,44 @@
|
||||
"""Extract searchable tokens from Canadian bank statement memos.
|
||||
|
||||
Handles common memo formats from RBC, TD, Scotia, BMO, plus generic
|
||||
cheque-number and reference-number patterns. Output is normalized
|
||||
(uppercase, alphanumeric) for case-insensitive matching.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
REF_PATTERNS = [
|
||||
(re.compile(r'\b(REF|REFERENCE)\s*#?\s*(\d+)\b', re.I), r'REF\2'),
|
||||
(re.compile(r'\b(CHQ|CHEQUE|CHECK)\s*#?\s*(\d+)\b', re.I), r'CHEQUE\2'),
|
||||
(re.compile(r'\b(INV|INVOICE)\s*#?\s*(\d+)\b', re.I), r'INV\2'),
|
||||
]
|
||||
|
||||
MIN_TOKEN_LENGTH = 2
|
||||
|
||||
|
||||
def tokenize_memo(memo: str | None) -> list[str]:
|
||||
"""Return list of normalized tokens from a bank memo.
|
||||
|
||||
Empty/None input returns []. Order preserved (first occurrence wins
|
||||
for de-duplication)."""
|
||||
if not memo:
|
||||
return []
|
||||
|
||||
text = memo.upper()
|
||||
for pattern, replacement in REF_PATTERNS:
|
||||
text = pattern.sub(replacement, text)
|
||||
|
||||
text = re.sub(r'[^A-Z0-9]+', ' ', text)
|
||||
raw_tokens = text.split()
|
||||
|
||||
seen = set()
|
||||
tokens = []
|
||||
for tok in raw_tokens:
|
||||
if len(tok) < MIN_TOKEN_LENGTH:
|
||||
continue
|
||||
if tok in seen:
|
||||
continue
|
||||
seen.add(tok)
|
||||
tokens.append(tok)
|
||||
|
||||
return tokens
|
||||
@@ -0,0 +1,74 @@
|
||||
"""Aggregate per-partner reconciliation patterns from precedent rows.
|
||||
|
||||
Computes typical amount range, cadence, preferred strategy, common memo
|
||||
tokens. Output is a dict suitable for create/write on fusion.reconcile.pattern.
|
||||
"""
|
||||
|
||||
from collections import Counter
|
||||
from statistics import median
|
||||
|
||||
|
||||
def extract_pattern_for_partner(env, *, company_id, partner_id) -> dict:
|
||||
"""Compute the pattern aggregate for one (company, partner) pair.
|
||||
|
||||
Returns vals dict suitable for env['fusion.reconcile.pattern'].create()."""
|
||||
Precedent = env['fusion.reconcile.precedent'].sudo()
|
||||
precedents = Precedent.search([
|
||||
('company_id', '=', company_id),
|
||||
('partner_id', '=', partner_id),
|
||||
], order='reconciled_at desc', limit=200)
|
||||
|
||||
if not precedents:
|
||||
return {
|
||||
'company_id': company_id,
|
||||
'partner_id': partner_id,
|
||||
'reconcile_count': 0,
|
||||
}
|
||||
|
||||
amounts = sorted(precedents.mapped('amount'))
|
||||
counts = precedents.mapped('matched_move_line_count')
|
||||
|
||||
single_count = sum(1 for c in counts if c == 1)
|
||||
multi_count = sum(1 for c in counts if c > 1)
|
||||
if multi_count > single_count:
|
||||
pref_strategy = 'multi_invoice'
|
||||
elif _amounts_concentrated(amounts):
|
||||
pref_strategy = 'exact_amount'
|
||||
else:
|
||||
pref_strategy = 'fifo'
|
||||
|
||||
reconcile_dates = sorted([p.reconciled_at for p in precedents if p.reconciled_at])
|
||||
if len(reconcile_dates) >= 2:
|
||||
deltas = [(reconcile_dates[i+1] - reconcile_dates[i]).days
|
||||
for i in range(len(reconcile_dates) - 1)]
|
||||
cadence = sum(deltas) / len(deltas) if deltas else 0.0
|
||||
else:
|
||||
cadence = 0.0
|
||||
|
||||
token_counter = Counter()
|
||||
for p in precedents:
|
||||
if p.memo_tokens:
|
||||
for tok in p.memo_tokens.split(','):
|
||||
token_counter[tok.strip()] += 1
|
||||
# Keep tokens appearing in >=30% of precedents (min floor of 2 occurrences)
|
||||
threshold = max(2, len(precedents) * 0.3)
|
||||
common_tokens = ','.join(t for t, c in token_counter.most_common() if c >= threshold)
|
||||
|
||||
return {
|
||||
'company_id': company_id,
|
||||
'partner_id': partner_id,
|
||||
'reconcile_count': len(precedents),
|
||||
'typical_amount_range': f"${min(amounts):,.2f} – ${max(amounts):,.2f} (median ${median(amounts):,.2f})",
|
||||
'typical_cadence_days': round(cadence, 1),
|
||||
'pref_strategy': pref_strategy,
|
||||
'common_memo_tokens': common_tokens,
|
||||
}
|
||||
|
||||
|
||||
def _amounts_concentrated(amounts: list[float]) -> bool:
|
||||
"""True if amounts cluster around a few values (suggests exact-amount strategy)."""
|
||||
if len(amounts) < 3:
|
||||
return True
|
||||
med = median(amounts)
|
||||
within_5pct = sum(1 for a in amounts if abs(a - med) / max(med, 1) < 0.05)
|
||||
return within_5pct / len(amounts) >= 0.6
|
||||
@@ -0,0 +1,116 @@
|
||||
"""Pure-Python helpers for backfilling fusion.reconcile.precedent
|
||||
from existing account.partial.reconcile rows during migration.
|
||||
|
||||
Strategy:
|
||||
- Each account.partial.reconcile that involves at least one
|
||||
account.bank.statement.line's reconcile-account line is a candidate.
|
||||
- One precedent per qualifying partial. The (statement_line.id, account_id,
|
||||
amount) triple is encoded into matched_account_ids so a second run can
|
||||
detect and skip already-backfilled rows (idempotency).
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .memo_tokenizer import tokenize_memo
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _identify_bank_side(partial):
|
||||
"""Return (bank_move_line, counterpart_move_line, statement_line_id)
|
||||
or (None, None, None) if neither side is a bank statement line."""
|
||||
debit_line = partial.debit_move_id
|
||||
credit_line = partial.credit_move_id
|
||||
|
||||
if debit_line.move_id.statement_line_id:
|
||||
return debit_line, credit_line, debit_line.move_id.statement_line_id.id
|
||||
if credit_line.move_id.statement_line_id:
|
||||
return credit_line, debit_line, credit_line.move_id.statement_line_id.id
|
||||
return None, None, None
|
||||
|
||||
|
||||
def backfill_precedents(env, *, company_id=None, batch_size=500, limit=10000):
|
||||
"""Walk account.partial.reconcile and create fusion.reconcile.precedent
|
||||
rows for any reconcile that involves a bank statement line.
|
||||
|
||||
Idempotent: skips partials whose (statement_line, account, amount)
|
||||
signature is already present in fusion.reconcile.precedent (encoded
|
||||
via matched_account_ids).
|
||||
|
||||
Returns dict with `created` and `skipped` counts.
|
||||
"""
|
||||
Precedent = env['fusion.reconcile.precedent'].sudo()
|
||||
Partial = env['account.partial.reconcile'].sudo()
|
||||
Line = env['account.bank.statement.line'].sudo()
|
||||
|
||||
in_test_mode = env.cr.__class__.__name__ == 'TestCursor'
|
||||
|
||||
# Pre-filter to partials that touch a bank statement line on either side.
|
||||
# In a real DB we typically have 10x more invoice<->payment partials than
|
||||
# bank-rec partials; filtering here keeps the loop bounded and makes the
|
||||
# default limit reflect "real" candidates rather than every partial ever.
|
||||
domain = [
|
||||
'|',
|
||||
('debit_move_id.move_id.statement_line_id', '!=', False),
|
||||
('credit_move_id.move_id.statement_line_id', '!=', False),
|
||||
]
|
||||
if company_id:
|
||||
domain.append(('company_id', '=', company_id))
|
||||
partials = Partial.search(domain, limit=limit, order='id asc')
|
||||
|
||||
created = 0
|
||||
skipped = 0
|
||||
for partial in partials:
|
||||
bank_line, counterpart, bsl_id = _identify_bank_side(partial)
|
||||
if not bsl_id:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
signature_account = str(counterpart.account_id.id)
|
||||
|
||||
existing = Precedent.search([
|
||||
('partner_id', '=',
|
||||
counterpart.partner_id.id if counterpart.partner_id else False),
|
||||
('amount', '=', abs(partial.amount)),
|
||||
('matched_account_ids', '=ilike', f'%{signature_account}%'),
|
||||
('source', '=', 'backfill'),
|
||||
], limit=1)
|
||||
if existing:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
statement_line = Line.browse(bsl_id)
|
||||
try:
|
||||
currency = (partial.debit_currency_id
|
||||
or partial.company_id.currency_id)
|
||||
Precedent.create({
|
||||
'company_id': partial.company_id.id,
|
||||
'partner_id': (counterpart.partner_id.id
|
||||
if counterpart.partner_id else False),
|
||||
'amount': abs(partial.amount),
|
||||
'currency_id': currency.id,
|
||||
'date': statement_line.date or partial.create_date.date(),
|
||||
'memo_tokens': ','.join(
|
||||
tokenize_memo(statement_line.payment_ref or '')),
|
||||
'journal_id': statement_line.journal_id.id,
|
||||
'matched_move_line_count': 1,
|
||||
'matched_account_ids': signature_account,
|
||||
'reconciler_user_id': partial.create_uid.id,
|
||||
'reconciled_at': partial.create_date,
|
||||
'source': 'backfill',
|
||||
})
|
||||
created += 1
|
||||
if created % batch_size == 0:
|
||||
if not in_test_mode:
|
||||
env.cr.commit()
|
||||
_logger.info(
|
||||
"Backfill progress: %d created, %d skipped",
|
||||
created, skipped)
|
||||
except Exception as e: # noqa: BLE001
|
||||
_logger.warning("Backfill skip partial %s: %s", partial.id, e)
|
||||
skipped += 1
|
||||
|
||||
_logger.info(
|
||||
"precedent_backfill complete: %d created, %d skipped",
|
||||
created, skipped)
|
||||
return {'created': created, 'skipped': skipped}
|
||||
@@ -0,0 +1,62 @@
|
||||
"""K-nearest precedent search.
|
||||
|
||||
Given a new bank line, find the most similar past reconciliations for
|
||||
ranking + confidence scoring. Distance metric: amount delta (primary),
|
||||
date recency (secondary), memo token overlap (tertiary).
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class PrecedentMatch:
|
||||
precedent_id: int
|
||||
amount: float
|
||||
memo_tokens: str
|
||||
matched_move_line_count: int
|
||||
similarity_score: float
|
||||
|
||||
|
||||
AMOUNT_TOLERANCE_PCT = 0.01 # 1% tolerance for "near" amount
|
||||
|
||||
|
||||
def find_nearest_precedents(env, *, partner_id, amount, k=5, memo_tokens=None):
|
||||
"""Return up to k most-similar precedents for a partner+amount.
|
||||
|
||||
Indexed query: filters by partner first (cheap), then ranks by
|
||||
amount distance + memo overlap. Sub-50ms for typical Westin volume."""
|
||||
Precedent = env['fusion.reconcile.precedent'].sudo()
|
||||
|
||||
tolerance = max(amount * AMOUNT_TOLERANCE_PCT, 1.00)
|
||||
candidates = Precedent.search([
|
||||
('partner_id', '=', partner_id),
|
||||
('amount', '>=', amount - tolerance),
|
||||
('amount', '<=', amount + tolerance),
|
||||
], limit=k * 4, order='reconciled_at desc')
|
||||
|
||||
results = []
|
||||
for p in candidates:
|
||||
amount_score = 1.0 - min(abs(p.amount - amount) / max(amount, 1), 1.0)
|
||||
memo_score = _memo_overlap(p.memo_tokens, memo_tokens) if memo_tokens else 0.5
|
||||
similarity = (amount_score * 0.7) + (memo_score * 0.3)
|
||||
results.append(PrecedentMatch(
|
||||
precedent_id=p.id,
|
||||
amount=p.amount,
|
||||
memo_tokens=p.memo_tokens or '',
|
||||
matched_move_line_count=p.matched_move_line_count,
|
||||
similarity_score=similarity,
|
||||
))
|
||||
|
||||
results.sort(key=lambda r: -r.similarity_score)
|
||||
return results[:k]
|
||||
|
||||
|
||||
def _memo_overlap(precedent_tokens_str, new_tokens) -> float:
|
||||
"""Jaccard similarity between two token sets."""
|
||||
if not precedent_tokens_str or not new_tokens:
|
||||
return 0.0
|
||||
precedent_set = set(precedent_tokens_str.split(','))
|
||||
new_set = set(new_tokens) if not isinstance(new_tokens, set) else new_tokens
|
||||
if not precedent_set and not new_set:
|
||||
return 0.0
|
||||
return len(precedent_set & new_set) / len(precedent_set | new_set)
|
||||
Reference in New Issue
Block a user