changes

2026-05-16 13:18:52 -04:00
parent 191a9c82be
commit 9ebf89bde2
1080 changed files with 0 additions and 1197 deletions
--- a/fusion_accounting/fusion_accounting_bank_rec/services/init.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/init.py
@@ -0,0 +1,7 @@
+from . import memo_tokenizer
+from . import exchange_diff
+from . import matching_strategies
+from . import precedent_lookup
+from . import pattern_extractor
+from . import confidence_scoring
+from . import precedent_backfill
--- a/fusion_accounting/fusion_accounting_bank_rec/services/confidence_scoring.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/confidence_scoring.py
@@ -0,0 +1,178 @@
+"""4-pass confidence scoring pipeline.
+
+Pass 1: SQL filter — partner match + reconcilable account (done by caller — engine._fetch_candidates)
+Pass 2: Statistical scoring — amount delta + pattern match + precedent similarity
+Pass 3: AI re-rank (if provider configured) — feed top 5 to LLM, parse JSON ranking
+Pass 4: Persist as fusion.reconcile.suggestion rows (done by caller — engine.suggest_matches)
+"""
+
+import json
+import logging
+from dataclasses import dataclass
+
+from .matching_strategies import Candidate
+from .precedent_lookup import find_nearest_precedents
+from .memo_tokenizer import tokenize_memo
+
+_logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ScoredCandidate:
+    candidate_id: int
+    confidence: float
+    reasoning: str
+    score_amount_match: float
+    score_partner_pattern: float
+    score_precedent_similarity: float
+    score_ai_rerank: float = 0.0
+
+
+def score_candidates(env, *, statement_line, candidates, k=5, use_ai=True):
+    """Score and rank candidate matches for a statement line.
+
+    Args:
+        env: Odoo env
+        statement_line: account.bank.statement.line recordset (singleton)
+        candidates: list of Candidate dataclasses (from matching_strategies)
+        k: max number of scored candidates to return
+        use_ai: if True AND a provider is configured, invoke AI re-rank
+
+    Returns:
+        list of ScoredCandidate sorted by confidence desc, max length k.
+    """
+    if not candidates or not statement_line:
+        return []
+
+    partner_id = statement_line.partner_id.id if statement_line.partner_id else None
+    bank_amount = abs(statement_line.amount)
+    memo_tokens = tokenize_memo(statement_line.payment_ref)
+
+    pattern = None
+    if partner_id:
+        pattern = env['fusion.reconcile.pattern'].sudo().search(
+            [('partner_id', '=', partner_id)], limit=1)
+        if not pattern:
+            pattern = None
+
+    precedents = []
+    if partner_id:
+        precedents = find_nearest_precedents(
+            env, partner_id=partner_id, amount=bank_amount, k=5, memo_tokens=memo_tokens)
+
+    scored = []
+    for cand in candidates:
+        amount_score = 1.0 - min(abs(cand.amount - bank_amount) / max(bank_amount, 1), 1.0)
+        pattern_score = _pattern_score(cand, pattern, bank_amount)
+        precedent_score = _precedent_score(cand, precedents)
+        confidence = (amount_score * 0.5) + (pattern_score * 0.25) + (precedent_score * 0.25)
+
+        reasoning = _build_reasoning(amount_score, pattern_score, precedent_score, pattern)
+        scored.append(ScoredCandidate(
+            candidate_id=cand.id,
+            confidence=round(confidence, 3),
+            reasoning=reasoning,
+            score_amount_match=round(amount_score, 3),
+            score_partner_pattern=round(pattern_score, 3),
+            score_precedent_similarity=round(precedent_score, 3),
+        ))
+
+    scored.sort(key=lambda s: -s.confidence)
+    top_k = scored[:k]
+
+    if use_ai:
+        provider = _get_provider(env, 'bank_rec_suggest')
+        if provider is not None:
+            try:
+                top_k = _ai_rerank(env, provider, statement_line, top_k, pattern, precedents)
+            except Exception as e:
+                _logger.warning("AI re-rank failed, using statistical scoring: %s", e)
+
+    return top_k
+
+
+def _pattern_score(cand, pattern, bank_amount) -> float:
+    """How well does this candidate fit the partner's typical pattern?"""
+    if not pattern:
+        return 0.5
+    score = 0.5
+    if pattern.pref_strategy == 'exact_amount' and abs(cand.amount - bank_amount) < 0.005:
+        score = 1.0
+    return score
+
+
+def _precedent_score(cand, precedents) -> float:
+    """How similar is this candidate to past precedents?"""
+    if not precedents:
+        return 0.5
+    best = max((p.similarity_score for p in precedents), default=0.5)
+    return best
+
+
+def _build_reasoning(amount_score, pattern_score, precedent_score, pattern) -> str:
+    parts = []
+    if amount_score >= 0.99:
+        parts.append("Exact amount match")
+    elif amount_score >= 0.95:
+        parts.append("Amount close")
+    if pattern and pattern.reconcile_count > 5:
+        parts.append(f"Matches partner's {pattern.reconcile_count}-reconcile pattern")
+    if precedent_score >= 0.8:
+        parts.append("Strong precedent match")
+    return " · ".join(parts) if parts else "Weak signal"
+
+
+def _get_provider(env, feature_name):
+    """Look up provider name from per-feature config; instantiate adapter.
+
+    Returns None if no provider configured (statistical-only mode)."""
+    param = env['ir.config_parameter'].sudo()
+    provider_name = param.get_param(f'fusion_accounting.provider.{feature_name}')
+    if not provider_name:
+        provider_name = param.get_param('fusion_accounting.provider.default')
+    if not provider_name:
+        return None
+    try:
+        from odoo.addons.fusion_accounting_ai.services.adapters.openai_adapter import OpenAIAdapter
+        from odoo.addons.fusion_accounting_ai.services.adapters.claude import ClaudeAdapter
+    except ImportError:
+        _logger.warning("fusion_accounting_ai adapters not importable")
+        return None
+    if provider_name.startswith('openai'):
+        return OpenAIAdapter(env)
+    elif provider_name.startswith('claude'):
+        return ClaudeAdapter(env)
+    return None
+
+
+def _ai_rerank(env, provider, statement_line, scored, pattern, precedents):
+    """Send top-K candidates + features to LLM for re-rank. Parse JSON response.
+
+    On any failure (network, JSON parse, missing key), return scored unchanged."""
+    try:
+        from odoo.addons.fusion_accounting_ai.services.prompts.bank_rec_prompt import build_prompt
+    except ImportError:
+        _logger.debug("bank_rec_prompt not yet available; skipping AI re-rank")
+        return scored
+
+    system, user = build_prompt(statement_line, scored, pattern, precedents)
+    response = provider.complete(
+        system=system,
+        messages=[{'role': 'user', 'content': user}],
+        max_tokens=800,
+        temperature=0.0,
+    )
+
+    try:
+        parsed = json.loads(response['content'])
+    except (json.JSONDecodeError, KeyError, TypeError):
+        return scored
+
+    ai_order = {item['candidate_id']: item for item in parsed.get('ranked', [])}
+    for s in scored:
+        if s.candidate_id in ai_order:
+            s.score_ai_rerank = ai_order[s.candidate_id].get('confidence', s.confidence)
+            s.reasoning = ai_order[s.candidate_id].get('reason', s.reasoning)
+            s.confidence = round((s.confidence * 0.4) + (s.score_ai_rerank * 0.6), 3)
+    scored.sort(key=lambda x: -x.confidence)
+    return scored
--- a/fusion_accounting/fusion_accounting_bank_rec/services/exchange_diff.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/exchange_diff.py
@@ -0,0 +1,46 @@
+"""Exchange-difference calculation helper.
+
+Pure-Python FX gain/loss computation. The engine uses this for rapid
+pre-checks; Odoo's account.move._create_exchange_difference_move() is
+invoked separately for the actual GL posting.
+"""
+
+from dataclasses import dataclass
+
+
+@dataclass
+class ExchangeDiffResult:
+    needs_diff_move: bool
+    diff_amount: float           # in company currency; positive = gain, negative = loss
+    line_company_amount: float
+    against_company_amount: float
+
+
+def compute_exchange_diff(*, line_amount, line_currency_code, against_amount,
+                            against_currency_code, line_rate, against_rate) -> ExchangeDiffResult:
+    """Compute whether an exchange-diff move is needed and its magnitude.
+
+    Args:
+        line_amount: Bank line amount in its currency
+        line_currency_code: e.g. 'USD'
+        against_amount: Matched journal item amount in its currency
+        against_currency_code: e.g. 'USD' (or different)
+        line_rate: FX rate (foreign per company currency) at line date
+        against_rate: FX rate at journal item posting date
+
+    Returns:
+        ExchangeDiffResult with needs_diff_move flag and computed diff
+        in company currency (positive = gain, negative = loss).
+    """
+    line_company = line_amount * line_rate
+    against_company = against_amount * against_rate
+
+    diff = line_company - against_company
+    needs_diff = abs(diff) > 0.005  # rounding tolerance
+
+    return ExchangeDiffResult(
+        needs_diff_move=needs_diff,
+        diff_amount=round(diff, 2),
+        line_company_amount=round(line_company, 2),
+        against_company_amount=round(against_company, 2),
+    )
--- a/fusion_accounting/fusion_accounting_bank_rec/services/matching_strategies.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/matching_strategies.py
@@ -0,0 +1,91 @@
+"""Matching strategy classes for the reconcile engine.
+
+Each strategy takes a bank amount + list of candidate journal items
+and returns a MatchResult with the picked ids + confidence + residual.
+Strategies are pure Python; no ORM dependency.
+"""
+
+from dataclasses import dataclass, field
+from itertools import combinations
+
+
+@dataclass
+class Candidate:
+    id: int
+    amount: float
+    partner_id: int
+    age_days: int
+
+
+@dataclass
+class MatchResult:
+    picked_ids: list[int] = field(default_factory=list)
+    confidence: float = 0.0
+    residual: float = 0.0  # bank_amount - sum(picked); positive = under-allocated
+    strategy_name: str = ""
+
+
+AMOUNT_TOLERANCE = 0.005  # currency rounding tolerance
+
+
+class AmountExactStrategy:
+    """Pick a single candidate whose amount equals the bank amount exactly.
+    If multiple candidates match exactly, pick the oldest (FIFO tiebreaker)."""
+
+    def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
+        exact = [c for c in candidates if abs(c.amount - bank_amount) < AMOUNT_TOLERANCE]
+        if not exact:
+            return MatchResult(strategy_name='amount_exact')
+        oldest = max(exact, key=lambda c: c.age_days)
+        return MatchResult(
+            picked_ids=[oldest.id],
+            confidence=1.0,
+            residual=0.0,
+            strategy_name='amount_exact',
+        )
+
+
+class FIFOStrategy:
+    """Pick oldest candidates first until the bank amount is exhausted.
+    May produce partial reconcile residual if last candidate doesn't fit exactly."""
+
+    def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
+        if not candidates:
+            return MatchResult(strategy_name='fifo')
+        oldest_first = sorted(candidates, key=lambda c: -c.age_days)
+        picked = []
+        remaining = bank_amount
+        for c in oldest_first:
+            if remaining <= AMOUNT_TOLERANCE:
+                break
+            picked.append(c.id)
+            remaining -= c.amount
+
+        confidence = 0.7 if remaining < AMOUNT_TOLERANCE else 0.5
+        return MatchResult(
+            picked_ids=picked,
+            confidence=confidence,
+            residual=remaining,
+            strategy_name='fifo',
+        )
+
+
+class MultiInvoiceStrategy:
+    """Find the smallest combination of candidates summing to the bank amount.
+    Bounded by max_combinations to keep complexity manageable."""
+
+    def __init__(self, max_combinations=3):
+        self.max_combinations = max_combinations
+
+    def match(self, *, bank_amount: float, candidates: list[Candidate]) -> MatchResult:
+        for k in range(2, self.max_combinations + 1):
+            for combo in combinations(candidates, k):
+                total = sum(c.amount for c in combo)
+                if abs(total - bank_amount) < AMOUNT_TOLERANCE:
+                    return MatchResult(
+                        picked_ids=[c.id for c in combo],
+                        confidence=0.85,
+                        residual=0.0,
+                        strategy_name=f'multi_invoice_{k}',
+                    )
+        return MatchResult(strategy_name='multi_invoice')
--- a/fusion_accounting/fusion_accounting_bank_rec/services/memo_tokenizer.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/memo_tokenizer.py
@@ -0,0 +1,44 @@
+"""Extract searchable tokens from Canadian bank statement memos.
+
+Handles common memo formats from RBC, TD, Scotia, BMO, plus generic
+cheque-number and reference-number patterns. Output is normalized
+(uppercase, alphanumeric) for case-insensitive matching.
+"""
+
+import re
+
+REF_PATTERNS = [
+    (re.compile(r'\b(REF|REFERENCE)\s*#?\s*(\d+)\b', re.I), r'REF\2'),
+    (re.compile(r'\b(CHQ|CHEQUE|CHECK)\s*#?\s*(\d+)\b', re.I), r'CHEQUE\2'),
+    (re.compile(r'\b(INV|INVOICE)\s*#?\s*(\d+)\b', re.I), r'INV\2'),
+]
+
+MIN_TOKEN_LENGTH = 2
+
+
+def tokenize_memo(memo: str | None) -> list[str]:
+    """Return list of normalized tokens from a bank memo.
+
+    Empty/None input returns []. Order preserved (first occurrence wins
+    for de-duplication)."""
+    if not memo:
+        return []
+
+    text = memo.upper()
+    for pattern, replacement in REF_PATTERNS:
+        text = pattern.sub(replacement, text)
+
+    text = re.sub(r'[^A-Z0-9]+', ' ', text)
+    raw_tokens = text.split()
+
+    seen = set()
+    tokens = []
+    for tok in raw_tokens:
+        if len(tok) < MIN_TOKEN_LENGTH:
+            continue
+        if tok in seen:
+            continue
+        seen.add(tok)
+        tokens.append(tok)
+
+    return tokens
--- a/fusion_accounting/fusion_accounting_bank_rec/services/pattern_extractor.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/pattern_extractor.py
@@ -0,0 +1,74 @@
+"""Aggregate per-partner reconciliation patterns from precedent rows.
+
+Computes typical amount range, cadence, preferred strategy, common memo
+tokens. Output is a dict suitable for create/write on fusion.reconcile.pattern.
+"""
+
+from collections import Counter
+from statistics import median
+
+
+def extract_pattern_for_partner(env, *, company_id, partner_id) -> dict:
+    """Compute the pattern aggregate for one (company, partner) pair.
+
+    Returns vals dict suitable for env['fusion.reconcile.pattern'].create()."""
+    Precedent = env['fusion.reconcile.precedent'].sudo()
+    precedents = Precedent.search([
+        ('company_id', '=', company_id),
+        ('partner_id', '=', partner_id),
+    ], order='reconciled_at desc', limit=200)
+
+    if not precedents:
+        return {
+            'company_id': company_id,
+            'partner_id': partner_id,
+            'reconcile_count': 0,
+        }
+
+    amounts = sorted(precedents.mapped('amount'))
+    counts = precedents.mapped('matched_move_line_count')
+
+    single_count = sum(1 for c in counts if c == 1)
+    multi_count = sum(1 for c in counts if c > 1)
+    if multi_count > single_count:
+        pref_strategy = 'multi_invoice'
+    elif _amounts_concentrated(amounts):
+        pref_strategy = 'exact_amount'
+    else:
+        pref_strategy = 'fifo'
+
+    reconcile_dates = sorted([p.reconciled_at for p in precedents if p.reconciled_at])
+    if len(reconcile_dates) >= 2:
+        deltas = [(reconcile_dates[i+1] - reconcile_dates[i]).days
+                  for i in range(len(reconcile_dates) - 1)]
+        cadence = sum(deltas) / len(deltas) if deltas else 0.0
+    else:
+        cadence = 0.0
+
+    token_counter = Counter()
+    for p in precedents:
+        if p.memo_tokens:
+            for tok in p.memo_tokens.split(','):
+                token_counter[tok.strip()] += 1
+    # Keep tokens appearing in >=30% of precedents (min floor of 2 occurrences)
+    threshold = max(2, len(precedents) * 0.3)
+    common_tokens = ','.join(t for t, c in token_counter.most_common() if c >= threshold)
+
+    return {
+        'company_id': company_id,
+        'partner_id': partner_id,
+        'reconcile_count': len(precedents),
+        'typical_amount_range': f"${min(amounts):,.2f} – ${max(amounts):,.2f} (median ${median(amounts):,.2f})",
+        'typical_cadence_days': round(cadence, 1),
+        'pref_strategy': pref_strategy,
+        'common_memo_tokens': common_tokens,
+    }
+
+
+def _amounts_concentrated(amounts: list[float]) -> bool:
+    """True if amounts cluster around a few values (suggests exact-amount strategy)."""
+    if len(amounts) < 3:
+        return True
+    med = median(amounts)
+    within_5pct = sum(1 for a in amounts if abs(a - med) / max(med, 1) < 0.05)
+    return within_5pct / len(amounts) >= 0.6
--- a/fusion_accounting/fusion_accounting_bank_rec/services/precedent_backfill.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/precedent_backfill.py
@@ -0,0 +1,116 @@
+"""Pure-Python helpers for backfilling fusion.reconcile.precedent
+from existing account.partial.reconcile rows during migration.
+
+Strategy:
+- Each account.partial.reconcile that involves at least one
+  account.bank.statement.line's reconcile-account line is a candidate.
+- One precedent per qualifying partial. The (statement_line.id, account_id,
+  amount) triple is encoded into matched_account_ids so a second run can
+  detect and skip already-backfilled rows (idempotency).
+"""
+
+import logging
+
+from .memo_tokenizer import tokenize_memo
+
+_logger = logging.getLogger(__name__)
+
+
+def _identify_bank_side(partial):
+    """Return (bank_move_line, counterpart_move_line, statement_line_id)
+    or (None, None, None) if neither side is a bank statement line."""
+    debit_line = partial.debit_move_id
+    credit_line = partial.credit_move_id
+
+    if debit_line.move_id.statement_line_id:
+        return debit_line, credit_line, debit_line.move_id.statement_line_id.id
+    if credit_line.move_id.statement_line_id:
+        return credit_line, debit_line, credit_line.move_id.statement_line_id.id
+    return None, None, None
+
+
+def backfill_precedents(env, *, company_id=None, batch_size=500, limit=10000):
+    """Walk account.partial.reconcile and create fusion.reconcile.precedent
+    rows for any reconcile that involves a bank statement line.
+
+    Idempotent: skips partials whose (statement_line, account, amount)
+    signature is already present in fusion.reconcile.precedent (encoded
+    via matched_account_ids).
+
+    Returns dict with `created` and `skipped` counts.
+    """
+    Precedent = env['fusion.reconcile.precedent'].sudo()
+    Partial = env['account.partial.reconcile'].sudo()
+    Line = env['account.bank.statement.line'].sudo()
+
+    in_test_mode = env.cr.__class__.__name__ == 'TestCursor'
+
+    # Pre-filter to partials that touch a bank statement line on either side.
+    # In a real DB we typically have 10x more invoice<->payment partials than
+    # bank-rec partials; filtering here keeps the loop bounded and makes the
+    # default limit reflect "real" candidates rather than every partial ever.
+    domain = [
+        '|',
+        ('debit_move_id.move_id.statement_line_id', '!=', False),
+        ('credit_move_id.move_id.statement_line_id', '!=', False),
+    ]
+    if company_id:
+        domain.append(('company_id', '=', company_id))
+    partials = Partial.search(domain, limit=limit, order='id asc')
+
+    created = 0
+    skipped = 0
+    for partial in partials:
+        bank_line, counterpart, bsl_id = _identify_bank_side(partial)
+        if not bsl_id:
+            skipped += 1
+            continue
+
+        signature_account = str(counterpart.account_id.id)
+
+        existing = Precedent.search([
+            ('partner_id', '=',
+             counterpart.partner_id.id if counterpart.partner_id else False),
+            ('amount', '=', abs(partial.amount)),
+            ('matched_account_ids', '=ilike', f'%{signature_account}%'),
+            ('source', '=', 'backfill'),
+        ], limit=1)
+        if existing:
+            skipped += 1
+            continue
+
+        statement_line = Line.browse(bsl_id)
+        try:
+            currency = (partial.debit_currency_id
+                        or partial.company_id.currency_id)
+            Precedent.create({
+                'company_id': partial.company_id.id,
+                'partner_id': (counterpart.partner_id.id
+                               if counterpart.partner_id else False),
+                'amount': abs(partial.amount),
+                'currency_id': currency.id,
+                'date': statement_line.date or partial.create_date.date(),
+                'memo_tokens': ','.join(
+                    tokenize_memo(statement_line.payment_ref or '')),
+                'journal_id': statement_line.journal_id.id,
+                'matched_move_line_count': 1,
+                'matched_account_ids': signature_account,
+                'reconciler_user_id': partial.create_uid.id,
+                'reconciled_at': partial.create_date,
+                'source': 'backfill',
+            })
+            created += 1
+            if created % batch_size == 0:
+                if not in_test_mode:
+                    env.cr.commit()
+                _logger.info(
+                    "Backfill progress: %d created, %d skipped",
+                    created, skipped)
+        except Exception as e:  # noqa: BLE001
+            _logger.warning("Backfill skip partial %s: %s", partial.id, e)
+            skipped += 1
+
+    _logger.info(
+        "precedent_backfill complete: %d created, %d skipped",
+        created, skipped)
+    return {'created': created, 'skipped': skipped}
--- a/fusion_accounting/fusion_accounting_bank_rec/services/precedent_lookup.py
+++ b/fusion_accounting/fusion_accounting_bank_rec/services/precedent_lookup.py
@@ -0,0 +1,62 @@
+"""K-nearest precedent search.
+
+Given a new bank line, find the most similar past reconciliations for
+ranking + confidence scoring. Distance metric: amount delta (primary),
+date recency (secondary), memo token overlap (tertiary).
+"""
+
+from dataclasses import dataclass
+
+
+@dataclass
+class PrecedentMatch:
+    precedent_id: int
+    amount: float
+    memo_tokens: str
+    matched_move_line_count: int
+    similarity_score: float
+
+
+AMOUNT_TOLERANCE_PCT = 0.01  # 1% tolerance for "near" amount
+
+
+def find_nearest_precedents(env, *, partner_id, amount, k=5, memo_tokens=None):
+    """Return up to k most-similar precedents for a partner+amount.
+
+    Indexed query: filters by partner first (cheap), then ranks by
+    amount distance + memo overlap. Sub-50ms for typical Westin volume."""
+    Precedent = env['fusion.reconcile.precedent'].sudo()
+
+    tolerance = max(amount * AMOUNT_TOLERANCE_PCT, 1.00)
+    candidates = Precedent.search([
+        ('partner_id', '=', partner_id),
+        ('amount', '>=', amount - tolerance),
+        ('amount', '<=', amount + tolerance),
+    ], limit=k * 4, order='reconciled_at desc')
+
+    results = []
+    for p in candidates:
+        amount_score = 1.0 - min(abs(p.amount - amount) / max(amount, 1), 1.0)
+        memo_score = _memo_overlap(p.memo_tokens, memo_tokens) if memo_tokens else 0.5
+        similarity = (amount_score * 0.7) + (memo_score * 0.3)
+        results.append(PrecedentMatch(
+            precedent_id=p.id,
+            amount=p.amount,
+            memo_tokens=p.memo_tokens or '',
+            matched_move_line_count=p.matched_move_line_count,
+            similarity_score=similarity,
+        ))
+
+    results.sort(key=lambda r: -r.similarity_score)
+    return results[:k]
+
+
+def _memo_overlap(precedent_tokens_str, new_tokens) -> float:
+    """Jaccard similarity between two token sets."""
+    if not precedent_tokens_str or not new_tokens:
+        return 0.0
+    precedent_set = set(precedent_tokens_str.split(','))
+    new_set = set(new_tokens) if not isinstance(new_tokens, set) else new_tokens
+    if not precedent_set and not new_set:
+        return 0.0
+    return len(precedent_set & new_set) / len(precedent_set | new_set)