This commit is contained in:
gsinghpal
2026-05-16 13:18:52 -04:00
parent 191a9c82be
commit 9ebf89bde2
1080 changed files with 0 additions and 1197 deletions

View File

@@ -0,0 +1,7 @@
from . import memo_tokenizer
from . import exchange_diff
from . import matching_strategies
from . import precedent_lookup
from . import pattern_extractor
from . import confidence_scoring
from . import precedent_backfill

View File

@@ -0,0 +1,178 @@
"""4-pass confidence scoring pipeline.
Pass 1: SQL filter — partner match + reconcilable account (done by caller — engine._fetch_candidates)
Pass 2: Statistical scoring — amount delta + pattern match + precedent similarity
Pass 3: AI re-rank (if provider configured) — feed top 5 to LLM, parse JSON ranking
Pass 4: Persist as fusion.reconcile.suggestion rows (done by caller — engine.suggest_matches)
"""
import json
import logging
from dataclasses import dataclass
from .matching_strategies import Candidate
from .precedent_lookup import find_nearest_precedents
from .memo_tokenizer import tokenize_memo
_logger = logging.getLogger(__name__)
@dataclass
class ScoredCandidate:
candidate_id: int
confidence: float
reasoning: str
score_amount_match: float
score_partner_pattern: float
score_precedent_similarity: float
score_ai_rerank: float = 0.0
def score_candidates(env, *, statement_line, candidates, k=5, use_ai=True):
"""Score and rank candidate matches for a statement line.
Args:
env: Odoo env
statement_line: account.bank.statement.line recordset (singleton)
candidates: list of Candidate dataclasses (from matching_strategies)
k: max number of scored candidates to return
use_ai: if True AND a provider is configured, invoke AI re-rank
Returns:
list of ScoredCandidate sorted by confidence desc, max length k.
"""
if not candidates or not statement_line:
return []
partner_id = statement_line.partner_id.id if statement_line.partner_id else None
bank_amount = abs(statement_line.amount)
memo_tokens = tokenize_memo(statement_line.payment_ref)
pattern = None
if partner_id:
pattern = env['fusion.reconcile.pattern'].sudo().search(
[('partner_id', '=', partner_id)], limit=1)
if not pattern:
pattern = None
precedents = []
if partner_id:
precedents = find_nearest_precedents(
env, partner_id=partner_id, amount=bank_amount, k=5, memo_tokens=memo_tokens)
scored = []
for cand in candidates:
amount_score = 1.0 - min(abs(cand.amount - bank_amount) / max(bank_amount, 1), 1.0)
pattern_score = _pattern_score(cand, pattern, bank_amount)
precedent_score = _precedent_score(cand, precedents)
confidence = (amount_score * 0.5) + (pattern_score * 0.25) + (precedent_score * 0.25)
reasoning = _build_reasoning(amount_score, pattern_score, precedent_score, pattern)
scored.append(ScoredCandidate(
candidate_id=cand.id,
confidence=round(confidence, 3),
reasoning=reasoning,
score_amount_match=round(amount_score, 3),
score_partner_pattern=round(pattern_score, 3),
score_precedent_similarity=round(precedent_score, 3),
))
scored.sort(key=lambda s: -s.confidence)
top_k = scored[:k]
if use_ai:
provider = _get_provider(env, 'bank_rec_suggest')
if provider is not None:
try:
top_k = _ai_rerank(env, provider, statement_line, top_k, pattern, precedents)
except Exception as e:
_logger.warning("AI re-rank failed, using statistical scoring: %s", e)
return top_k
def _pattern_score(cand, pattern, bank_amount) -> float:
"""How well does this candidate fit the partner's typical pattern?"""
if not pattern:
return 0.5
score = 0.5
if pattern.pref_strategy == 'exact_amount' and abs(cand.amount - bank_amount) < 0.005:
score = 1.0
return score
def _precedent_score(cand, precedents) -> float:
"""How similar is this candidate to past precedents?"""
if not precedents:
return 0.5
best = max((p.similarity_score for p in precedents), default=0.5)
return best
def _build_reasoning(amount_score, pattern_score, precedent_score, pattern) -> str:
parts = []
if amount_score >= 0.99:
parts.append("Exact amount match")
elif amount_score >= 0.95:
parts.append("Amount close")
if pattern and pattern.reconcile_count > 5:
parts.append(f"Matches partner's {pattern.reconcile_count}-reconcile pattern")
if precedent_score >= 0.8:
parts.append("Strong precedent match")
return " · ".join(parts) if parts else "Weak signal"
def _get_provider(env, feature_name):
"""Look up provider name from per-feature config; instantiate adapter.
Returns None if no provider configured (statistical-only mode)."""
param = env['ir.config_parameter'].sudo()
provider_name = param.get_param(f'fusion_accounting.provider.{feature_name}')
if not provider_name:
provider_name = param.get_param('fusion_accounting.provider.default')
if not provider_name:
return None
try:
from odoo.addons.fusion_accounting_ai.services.adapters.openai_adapter import OpenAIAdapter
from odoo.addons.fusion_accounting_ai.services.adapters.claude import ClaudeAdapter
except ImportError:
_logger.warning("fusion_accounting_ai adapters not importable")
return None
if provider_name.startswith('openai'):
return OpenAIAdapter(env)
elif provider_name.startswith('claude'):
return ClaudeAdapter(env)
return None
def _ai_rerank(env, provider, statement_line, scored, pattern, precedents):
"""Send top-K candidates + features to LLM for re-rank. Parse JSON response.
On any failure (network, JSON parse, missing key), return scored unchanged."""
try:
from odoo.addons.fusion_accounting_ai.services.prompts.bank_rec_prompt import build_prompt
except ImportError:
_logger.debug("bank_rec_prompt not yet available; skipping AI re-rank")
return scored
system, user = build_prompt(statement_line, scored, pattern, precedents)
response = provider.complete(
system=system,
messages=[{'role': 'user', 'content': user}],
max_tokens=800,
temperature=0.0,
)
try:
parsed = json.loads(response['content'])
except (json.JSONDecodeError, KeyError, TypeError):
return scored
ai_order = {item['candidate_id']: item for item in parsed.get('ranked', [])}
for s in scored:
if s.candidate_id in ai_order:
s.score_ai_rerank = ai_order[s.candidate_id].get('confidence', s.confidence)
s.reasoning = ai_order[s.candidate_id].get('reason', s.reasoning)
s.confidence = round((s.confidence * 0.4) + (s.score_ai_rerank * 0.6), 3)
scored.sort(key=lambda x: -x.confidence)
return scored

View File

@@ -0,0 +1,46 @@
"""Exchange-difference calculation helper.
Pure-Python FX gain/loss computation. The engine uses this for rapid
pre-checks; Odoo's account.move._create_exchange_difference_move() is
invoked separately for the actual GL posting.
"""
from dataclasses import dataclass
@dataclass
class ExchangeDiffResult:
needs_diff_move: bool
diff_amount: float # in company currency; positive = gain, negative = loss
line_company_amount: float
against_company_amount: float
def compute_exchange_diff(*, line_amount, line_currency_code, against_amount,
against_currency_code, line_rate, against_rate) -> ExchangeDiffResult:
"""Compute whether an exchange-diff move is needed and its magnitude.
Args:
line_amount: Bank line amount in its currency
line_currency_code: e.g. 'USD'
against_amount: Matched journal item amount in its currency
against_currency_code: e.g. 'USD' (or different)
line_rate: FX rate (foreign per company currency) at line date
against_rate: FX rate at journal item posting date
Returns:
ExchangeDiffResult with needs_diff_move flag and computed diff
in company currency (positive = gain, negative = loss).
"""
line_company = line_amount * line_rate
against_company = against_amount * against_rate
diff = line_company - against_company
needs_diff = abs(diff) > 0.005 # rounding tolerance
return ExchangeDiffResult(
needs_diff_move=needs_diff,
diff_amount=round(diff, 2),
line_company_amount=round(line_company, 2),
against_company_amount=round(against_company, 2),
)

View File

@@ -0,0 +1,91 @@
"""Matching strategy classes for the reconcile engine.
Each strategy takes a bank amount + list of candidate journal items
and returns a MatchResult with the picked ids + confidence + residual.
Strategies are pure Python; no ORM dependency.
"""
from dataclasses import dataclass, field
from itertools import combinations
@dataclass
class Candidate:
id: int
amount: float
partner_id: int
age_days: int
@dataclass
class MatchResult:
picked_ids: list[int] = field(default_factory=list)
confidence: float = 0.0
residual: float = 0.0 # bank_amount - sum(picked); positive = under-allocated
strategy_name: str = ""
AMOUNT_TOLERANCE = 0.005 # currency rounding tolerance
class AmountExactStrategy:
"""Pick a single candidate whose amount equals the bank amount exactly.
If multiple candidates match exactly, pick the oldest (FIFO tiebreaker)."""
def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
exact = [c for c in candidates if abs(c.amount - bank_amount) < AMOUNT_TOLERANCE]
if not exact:
return MatchResult(strategy_name='amount_exact')
oldest = max(exact, key=lambda c: c.age_days)
return MatchResult(
picked_ids=[oldest.id],
confidence=1.0,
residual=0.0,
strategy_name='amount_exact',
)
class FIFOStrategy:
"""Pick oldest candidates first until the bank amount is exhausted.
May produce partial reconcile residual if last candidate doesn't fit exactly."""
def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
if not candidates:
return MatchResult(strategy_name='fifo')
oldest_first = sorted(candidates, key=lambda c: -c.age_days)
picked = []
remaining = bank_amount
for c in oldest_first:
if remaining <= AMOUNT_TOLERANCE:
break
picked.append(c.id)
remaining -= c.amount
confidence = 0.7 if remaining < AMOUNT_TOLERANCE else 0.5
return MatchResult(
picked_ids=picked,
confidence=confidence,
residual=remaining,
strategy_name='fifo',
)
class MultiInvoiceStrategy:
"""Find the smallest combination of candidates summing to the bank amount.
Bounded by max_combinations to keep complexity manageable."""
def __init__(self, max_combinations=3):
self.max_combinations = max_combinations
def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
for k in range(2, self.max_combinations + 1):
for combo in combinations(candidates, k):
total = sum(c.amount for c in combo)
if abs(total - bank_amount) < AMOUNT_TOLERANCE:
return MatchResult(
picked_ids=[c.id for c in combo],
confidence=0.85,
residual=0.0,
strategy_name=f'multi_invoice_{k}',
)
return MatchResult(strategy_name='multi_invoice')

View File

@@ -0,0 +1,44 @@
"""Extract searchable tokens from Canadian bank statement memos.
Handles common memo formats from RBC, TD, Scotia, BMO, plus generic
cheque-number and reference-number patterns. Output is normalized
(uppercase, alphanumeric) for case-insensitive matching.
"""
import re
REF_PATTERNS = [
(re.compile(r'\b(REF|REFERENCE)\s*#?\s*(\d+)\b', re.I), r'REF\2'),
(re.compile(r'\b(CHQ|CHEQUE|CHECK)\s*#?\s*(\d+)\b', re.I), r'CHEQUE\2'),
(re.compile(r'\b(INV|INVOICE)\s*#?\s*(\d+)\b', re.I), r'INV\2'),
]
MIN_TOKEN_LENGTH = 2
def tokenize_memo(memo: str | None) -> list[str]:
"""Return list of normalized tokens from a bank memo.
Empty/None input returns []. Order preserved (first occurrence wins
for de-duplication)."""
if not memo:
return []
text = memo.upper()
for pattern, replacement in REF_PATTERNS:
text = pattern.sub(replacement, text)
text = re.sub(r'[^A-Z0-9]+', ' ', text)
raw_tokens = text.split()
seen = set()
tokens = []
for tok in raw_tokens:
if len(tok) < MIN_TOKEN_LENGTH:
continue
if tok in seen:
continue
seen.add(tok)
tokens.append(tok)
return tokens

View File

@@ -0,0 +1,74 @@
"""Aggregate per-partner reconciliation patterns from precedent rows.
Computes typical amount range, cadence, preferred strategy, common memo
tokens. Output is a dict suitable for create/write on fusion.reconcile.pattern.
"""
from collections import Counter
from statistics import median
def extract_pattern_for_partner(env, *, company_id, partner_id) -> dict:
"""Compute the pattern aggregate for one (company, partner) pair.
Returns vals dict suitable for env['fusion.reconcile.pattern'].create()."""
Precedent = env['fusion.reconcile.precedent'].sudo()
precedents = Precedent.search([
('company_id', '=', company_id),
('partner_id', '=', partner_id),
], order='reconciled_at desc', limit=200)
if not precedents:
return {
'company_id': company_id,
'partner_id': partner_id,
'reconcile_count': 0,
}
amounts = sorted(precedents.mapped('amount'))
counts = precedents.mapped('matched_move_line_count')
single_count = sum(1 for c in counts if c == 1)
multi_count = sum(1 for c in counts if c > 1)
if multi_count > single_count:
pref_strategy = 'multi_invoice'
elif _amounts_concentrated(amounts):
pref_strategy = 'exact_amount'
else:
pref_strategy = 'fifo'
reconcile_dates = sorted([p.reconciled_at for p in precedents if p.reconciled_at])
if len(reconcile_dates) >= 2:
deltas = [(reconcile_dates[i+1] - reconcile_dates[i]).days
for i in range(len(reconcile_dates) - 1)]
cadence = sum(deltas) / len(deltas) if deltas else 0.0
else:
cadence = 0.0
token_counter = Counter()
for p in precedents:
if p.memo_tokens:
for tok in p.memo_tokens.split(','):
token_counter[tok.strip()] += 1
# Keep tokens appearing in >=30% of precedents (min floor of 2 occurrences)
threshold = max(2, len(precedents) * 0.3)
common_tokens = ','.join(t for t, c in token_counter.most_common() if c >= threshold)
return {
'company_id': company_id,
'partner_id': partner_id,
'reconcile_count': len(precedents),
'typical_amount_range': f"${min(amounts):,.2f} ${max(amounts):,.2f} (median ${median(amounts):,.2f})",
'typical_cadence_days': round(cadence, 1),
'pref_strategy': pref_strategy,
'common_memo_tokens': common_tokens,
}
def _amounts_concentrated(amounts: list[float]) -> bool:
"""True if amounts cluster around a few values (suggests exact-amount strategy)."""
if len(amounts) < 3:
return True
med = median(amounts)
within_5pct = sum(1 for a in amounts if abs(a - med) / max(med, 1) < 0.05)
return within_5pct / len(amounts) >= 0.6

View File

@@ -0,0 +1,116 @@
"""Pure-Python helpers for backfilling fusion.reconcile.precedent
from existing account.partial.reconcile rows during migration.
Strategy:
- Each account.partial.reconcile that involves at least one
account.bank.statement.line's reconcile-account line is a candidate.
- One precedent per qualifying partial. The (statement_line.id, account_id,
amount) triple is encoded into matched_account_ids so a second run can
detect and skip already-backfilled rows (idempotency).
"""
import logging
from .memo_tokenizer import tokenize_memo
_logger = logging.getLogger(__name__)
def _identify_bank_side(partial):
"""Return (bank_move_line, counterpart_move_line, statement_line_id)
or (None, None, None) if neither side is a bank statement line."""
debit_line = partial.debit_move_id
credit_line = partial.credit_move_id
if debit_line.move_id.statement_line_id:
return debit_line, credit_line, debit_line.move_id.statement_line_id.id
if credit_line.move_id.statement_line_id:
return credit_line, debit_line, credit_line.move_id.statement_line_id.id
return None, None, None
def backfill_precedents(env, *, company_id=None, batch_size=500, limit=10000):
"""Walk account.partial.reconcile and create fusion.reconcile.precedent
rows for any reconcile that involves a bank statement line.
Idempotent: skips partials whose (statement_line, account, amount)
signature is already present in fusion.reconcile.precedent (encoded
via matched_account_ids).
Returns dict with `created` and `skipped` counts.
"""
Precedent = env['fusion.reconcile.precedent'].sudo()
Partial = env['account.partial.reconcile'].sudo()
Line = env['account.bank.statement.line'].sudo()
in_test_mode = env.cr.__class__.__name__ == 'TestCursor'
# Pre-filter to partials that touch a bank statement line on either side.
# In a real DB we typically have 10x more invoice<->payment partials than
# bank-rec partials; filtering here keeps the loop bounded and makes the
# default limit reflect "real" candidates rather than every partial ever.
domain = [
'|',
('debit_move_id.move_id.statement_line_id', '!=', False),
('credit_move_id.move_id.statement_line_id', '!=', False),
]
if company_id:
domain.append(('company_id', '=', company_id))
partials = Partial.search(domain, limit=limit, order='id asc')
created = 0
skipped = 0
for partial in partials:
bank_line, counterpart, bsl_id = _identify_bank_side(partial)
if not bsl_id:
skipped += 1
continue
signature_account = str(counterpart.account_id.id)
existing = Precedent.search([
('partner_id', '=',
counterpart.partner_id.id if counterpart.partner_id else False),
('amount', '=', abs(partial.amount)),
('matched_account_ids', '=ilike', f'%{signature_account}%'),
('source', '=', 'backfill'),
], limit=1)
if existing:
skipped += 1
continue
statement_line = Line.browse(bsl_id)
try:
currency = (partial.debit_currency_id
or partial.company_id.currency_id)
Precedent.create({
'company_id': partial.company_id.id,
'partner_id': (counterpart.partner_id.id
if counterpart.partner_id else False),
'amount': abs(partial.amount),
'currency_id': currency.id,
'date': statement_line.date or partial.create_date.date(),
'memo_tokens': ','.join(
tokenize_memo(statement_line.payment_ref or '')),
'journal_id': statement_line.journal_id.id,
'matched_move_line_count': 1,
'matched_account_ids': signature_account,
'reconciler_user_id': partial.create_uid.id,
'reconciled_at': partial.create_date,
'source': 'backfill',
})
created += 1
if created % batch_size == 0:
if not in_test_mode:
env.cr.commit()
_logger.info(
"Backfill progress: %d created, %d skipped",
created, skipped)
except Exception as e: # noqa: BLE001
_logger.warning("Backfill skip partial %s: %s", partial.id, e)
skipped += 1
_logger.info(
"precedent_backfill complete: %d created, %d skipped",
created, skipped)
return {'created': created, 'skipped': skipped}

View File

@@ -0,0 +1,62 @@
"""K-nearest precedent search.
Given a new bank line, find the most similar past reconciliations for
ranking + confidence scoring. Distance metric: amount delta (primary),
date recency (secondary), memo token overlap (tertiary).
"""
from dataclasses import dataclass
@dataclass
class PrecedentMatch:
precedent_id: int
amount: float
memo_tokens: str
matched_move_line_count: int
similarity_score: float
AMOUNT_TOLERANCE_PCT = 0.01 # 1% tolerance for "near" amount
def find_nearest_precedents(env, *, partner_id, amount, k=5, memo_tokens=None):
"""Return up to k most-similar precedents for a partner+amount.
Indexed query: filters by partner first (cheap), then ranks by
amount distance + memo overlap. Sub-50ms for typical Westin volume."""
Precedent = env['fusion.reconcile.precedent'].sudo()
tolerance = max(amount * AMOUNT_TOLERANCE_PCT, 1.00)
candidates = Precedent.search([
('partner_id', '=', partner_id),
('amount', '>=', amount - tolerance),
('amount', '<=', amount + tolerance),
], limit=k * 4, order='reconciled_at desc')
results = []
for p in candidates:
amount_score = 1.0 - min(abs(p.amount - amount) / max(amount, 1), 1.0)
memo_score = _memo_overlap(p.memo_tokens, memo_tokens) if memo_tokens else 0.5
similarity = (amount_score * 0.7) + (memo_score * 0.3)
results.append(PrecedentMatch(
precedent_id=p.id,
amount=p.amount,
memo_tokens=p.memo_tokens or '',
matched_move_line_count=p.matched_move_line_count,
similarity_score=similarity,
))
results.sort(key=lambda r: -r.similarity_score)
return results[:k]
def _memo_overlap(precedent_tokens_str, new_tokens) -> float:
"""Jaccard similarity between two token sets."""
if not precedent_tokens_str or not new_tokens:
return 0.0
precedent_set = set(precedent_tokens_str.split(','))
new_set = set(new_tokens) if not isinstance(new_tokens, set) else new_tokens
if not precedent_set and not new_set:
return 0.0
return len(precedent_set & new_set) / len(precedent_set | new_set)