Provider-agnostic system + user prompt builder for the confidence scoring pipeline's Pass 3 (AI re-rank). Output contract is JSON with "ranked" array; works with OpenAI, Claude, and local OpenAI-compatible servers (LM Studio, Ollama). Made-with: Cursor
108 lines
4.4 KiB
Python
108 lines
4.4 KiB
Python
"""Bank reconciliation AI re-rank prompt.
|
|
|
|
Used by fusion_accounting_bank_rec/services/confidence_scoring.py to ask
|
|
an LLM to refine the statistical ranking of candidate matches.
|
|
|
|
Output contract: the LLM MUST respond with valid JSON of shape:
|
|
{"ranked": [{"candidate_id": int, "confidence": float, "reason": str}, ...]}
|
|
|
|
System prompt is provider-agnostic - works with OpenAI Chat Completions,
|
|
Claude Messages, and local OpenAI-compatible servers (LM Studio, Ollama).
|
|
"""
|
|
|
|
from datetime import date
|
|
|
|
|
|
SYSTEM_PROMPT = """You are an expert accountant assisting with bank reconciliation.
|
|
|
|
Your job: given a bank statement line and a list of candidate journal items
|
|
that statistically scored well as potential matches, re-rank them based on
|
|
domain expertise. Consider:
|
|
|
|
1. **Amount-exact matches** are almost always correct unless the partner is wrong.
|
|
2. **Memo / reference clues** - bank memos often contain invoice numbers, partner
|
|
names, or transaction references that disambiguate matches.
|
|
3. **Date proximity** - invoices are typically reconciled within 30 days of issue.
|
|
4. **Pattern conformance** - if the partner has a learned pattern (e.g. "always
|
|
pays exact amount, weekly cadence"), favor candidates that fit that pattern.
|
|
5. **Precedent similarity** - if a near-identical reconcile happened before,
|
|
it's likely the right one.
|
|
|
|
Return ONLY valid JSON of this exact shape:
|
|
{
|
|
"ranked": [
|
|
{"candidate_id": <int>, "confidence": <float 0-1>, "reason": "<short string>"},
|
|
...
|
|
]
|
|
}
|
|
|
|
Do NOT include any prose before or after the JSON. Do NOT use markdown code fences.
|
|
The "ranked" array MUST contain every candidate_id from the input, in your
|
|
preferred order (highest confidence first).
|
|
"""
|
|
|
|
|
|
def build_prompt(statement_line, scored_candidates, pattern=None, precedents=None):
|
|
"""Build (system_prompt, user_prompt) for AI re-rank.
|
|
|
|
Args:
|
|
statement_line: account.bank.statement.line recordset (singleton)
|
|
scored_candidates: list of ScoredCandidate dataclasses (from confidence_scoring)
|
|
pattern: fusion.reconcile.pattern recordset for the partner, or None
|
|
precedents: list of PrecedentMatch dataclasses, or None
|
|
|
|
Returns:
|
|
(system_prompt: str, user_prompt: str) tuple
|
|
"""
|
|
user_parts = []
|
|
|
|
user_parts.append("BANK LINE:")
|
|
user_parts.append(f" Date: {statement_line.date}")
|
|
user_parts.append(
|
|
f" Amount: {statement_line.amount} {statement_line.currency_id.name or ''}"
|
|
)
|
|
user_parts.append(
|
|
f" Memo / payment ref: {statement_line.payment_ref or '(none)'}"
|
|
)
|
|
if statement_line.partner_id:
|
|
user_parts.append(f" Partner: {statement_line.partner_id.name}")
|
|
|
|
if pattern:
|
|
user_parts.append("")
|
|
user_parts.append("PARTNER PATTERN (learned from past reconciles):")
|
|
user_parts.append(f" Reconcile count: {pattern.reconcile_count}")
|
|
user_parts.append(f" Preferred strategy: {pattern.pref_strategy}")
|
|
user_parts.append(
|
|
f" Typical cadence: ~{pattern.typical_cadence_days} days between reconciles"
|
|
)
|
|
if pattern.typical_amount_range:
|
|
user_parts.append(f" Typical amount range: {pattern.typical_amount_range}")
|
|
if pattern.common_memo_tokens:
|
|
user_parts.append(f" Common memo tokens: {pattern.common_memo_tokens}")
|
|
|
|
if precedents:
|
|
user_parts.append("")
|
|
user_parts.append("RECENT PRECEDENTS (most-similar past reconciles for this partner):")
|
|
# Cap at 3 precedents to keep prompt small and reduce token cost.
|
|
for p in precedents[:3]:
|
|
user_parts.append(
|
|
f" - amount={p.amount}, similarity={p.similarity_score:.2f}, "
|
|
f"matched {p.matched_move_line_count} line(s), tokens={p.memo_tokens}"
|
|
)
|
|
|
|
user_parts.append("")
|
|
user_parts.append("CANDIDATES (scored by statistical pipeline):")
|
|
for s in scored_candidates:
|
|
user_parts.append(
|
|
f" - candidate_id={s.candidate_id}, statistical_confidence={s.confidence}, "
|
|
f"amount_match={s.score_amount_match}, pattern_fit={s.score_partner_pattern}, "
|
|
f"precedent_sim={s.score_precedent_similarity}, "
|
|
f"reason=\"{s.reasoning}\""
|
|
)
|
|
|
|
user_parts.append("")
|
|
user_parts.append("Re-rank these candidates and return JSON per the system prompt.")
|
|
|
|
user_prompt = "\n".join(user_parts)
|
|
return (SYSTEM_PROMPT, user_prompt)
|