"""Bank reconciliation AI re-rank prompt. Used by fusion_accounting_bank_rec/services/confidence_scoring.py to ask an LLM to refine the statistical ranking of candidate matches. Output contract: the LLM MUST respond with valid JSON of shape: {"ranked": [{"candidate_id": int, "confidence": float, "reason": str}, ...]} System prompt is provider-agnostic - works with OpenAI Chat Completions, Claude Messages, and local OpenAI-compatible servers (LM Studio, Ollama). """ from datetime import date SYSTEM_PROMPT = """You are an expert accountant assisting with bank reconciliation. Your job: given a bank statement line and a list of candidate journal items that statistically scored well as potential matches, re-rank them based on domain expertise. Consider: 1. **Amount-exact matches** are almost always correct unless the partner is wrong. 2. **Memo / reference clues** - bank memos often contain invoice numbers, partner names, or transaction references that disambiguate matches. 3. **Date proximity** - invoices are typically reconciled within 30 days of issue. 4. **Pattern conformance** - if the partner has a learned pattern (e.g. "always pays exact amount, weekly cadence"), favor candidates that fit that pattern. 5. **Precedent similarity** - if a near-identical reconcile happened before, it's likely the right one. Return ONLY valid JSON of this exact shape: { "ranked": [ {"candidate_id": , "confidence": , "reason": ""}, ... ] } Do NOT include any prose before or after the JSON. Do NOT use markdown code fences. The "ranked" array MUST contain every candidate_id from the input, in your preferred order (highest confidence first). """ def build_prompt(statement_line, scored_candidates, pattern=None, precedents=None): """Build (system_prompt, user_prompt) for AI re-rank. Args: statement_line: account.bank.statement.line recordset (singleton) scored_candidates: list of ScoredCandidate dataclasses (from confidence_scoring) pattern: fusion.reconcile.pattern recordset for the partner, or None precedents: list of PrecedentMatch dataclasses, or None Returns: (system_prompt: str, user_prompt: str) tuple """ user_parts = [] user_parts.append("BANK LINE:") user_parts.append(f" Date: {statement_line.date}") user_parts.append( f" Amount: {statement_line.amount} {statement_line.currency_id.name or ''}" ) user_parts.append( f" Memo / payment ref: {statement_line.payment_ref or '(none)'}" ) if statement_line.partner_id: user_parts.append(f" Partner: {statement_line.partner_id.name}") if pattern: user_parts.append("") user_parts.append("PARTNER PATTERN (learned from past reconciles):") user_parts.append(f" Reconcile count: {pattern.reconcile_count}") user_parts.append(f" Preferred strategy: {pattern.pref_strategy}") user_parts.append( f" Typical cadence: ~{pattern.typical_cadence_days} days between reconciles" ) if pattern.typical_amount_range: user_parts.append(f" Typical amount range: {pattern.typical_amount_range}") if pattern.common_memo_tokens: user_parts.append(f" Common memo tokens: {pattern.common_memo_tokens}") if precedents: user_parts.append("") user_parts.append("RECENT PRECEDENTS (most-similar past reconciles for this partner):") # Cap at 3 precedents to keep prompt small and reduce token cost. for p in precedents[:3]: user_parts.append( f" - amount={p.amount}, similarity={p.similarity_score:.2f}, " f"matched {p.matched_move_line_count} line(s), tokens={p.memo_tokens}" ) user_parts.append("") user_parts.append("CANDIDATES (scored by statistical pipeline):") for s in scored_candidates: user_parts.append( f" - candidate_id={s.candidate_id}, statistical_confidence={s.confidence}, " f"amount_match={s.score_amount_match}, pattern_fit={s.score_partner_pattern}, " f"precedent_sim={s.score_precedent_similarity}, " f"reason=\"{s.reasoning}\"" ) user_parts.append("") user_parts.append("Re-rank these candidates and return JSON per the system prompt.") user_prompt = "\n".join(user_parts) return (SYSTEM_PROMPT, user_prompt)