test(fusion_accounting_bank_rec): local LLM (LM Studio/Ollama) compat smoke

Tagged 'local_llm'. Auto-detects LM Studio (:1234) or Ollama (:11434) via host.docker.internal or localhost. When running, configures the provider params and runs engine.suggest_matches end-to-end. Skips gracefully when no local LLM is present (CI / dev VM mode). Made-with: Cursor
test(fusion_accounting_bank_rec): performance benchmarks with P95 targets
2026-04-19 14:01:58 -04:00 · 2026-04-19 14:00:15 -04:00
3 changed files with 292 additions and 0 deletions
--- a/fusion_accounting_bank_rec/tests/init.py
+++ b/fusion_accounting_bank_rec/tests/init.py
@@ -21,3 +21,5 @@ from . import test_bulk_reconcile_wizard
 from . import test_migration_round_trip
 from . import test_coexistence
 from . import test_bank_rec_tours
+from . import test_performance_benchmarks
+from . import test_local_llm_compat
--- a/fusion_accounting_bank_rec/tests/test_local_llm_compat.py
+++ b/fusion_accounting_bank_rec/tests/test_local_llm_compat.py
@@ -0,0 +1,102 @@
+"""Local LLM compatibility test (LM Studio, Ollama, etc.).
+
+Skips if no local OpenAI-compatible LLM server is reachable. When one is
+running (LM Studio at :1234, Ollama at :11434), runs an end-to-end:
+
+1. Configure ``ir.config_parameter`` to point at the local server.
+2. Trigger ``engine.suggest_matches`` with the 'openai' provider.
+3. Assert the call did not crash and produced at least one suggestion.
+
+The smoke is intentionally lenient: local models often emit malformed
+JSON, in which case ``confidence_scoring`` falls back to statistical-only
+ranking. We assert end-to-end happiness, not AI re-rank quality.
+"""
+
+import socket
+
+from odoo.tests.common import TransactionCase, tagged
+
+from . import _factories as f
+
+
+def _server_reachable(host, port, timeout=1.0):
+    try:
+        with socket.create_connection((host, port), timeout=timeout):
+            return True
+    except (OSError, socket.timeout):
+        return False
+
+
+def _detect_local_llm():
+    """Return (base_url, model_name) tuple, or (None, None) if no server.
+
+    Tries LM Studio (:1234) and Ollama (:11434) on both
+    ``host.docker.internal`` (so the container can reach the host) and
+    ``localhost`` (so a non-containerised run finds the same servers).
+    """
+    candidates = (
+        ('host.docker.internal', 1234, 'local-model'),    # LM Studio
+        ('host.docker.internal', 11434, 'llama3.1:8b'),   # Ollama
+        ('localhost', 1234, 'local-model'),
+        ('localhost', 11434, 'llama3.1:8b'),
+    )
+    for host, port, default_model in candidates:
+        if _server_reachable(host, port, timeout=0.5):
+            return (f'http://{host}:{port}/v1', default_model)
+    return (None, None)
+
+
+@tagged('post_install', '-at_install', 'local_llm')
+class TestLocalLLMCompat(TransactionCase):
+
+    def setUp(self):
+        super().setUp()
+        self.base_url, self.model = _detect_local_llm()
+        if not self.base_url:
+            self.skipTest(
+                "No local LLM server detected "
+                "(LM Studio :1234 / Ollama :11434)")
+
+    def test_suggest_matches_with_local_llm(self):
+        params = self.env['ir.config_parameter'].sudo()
+        prior = {
+            'fusion_accounting.openai_base_url': params.get_param(
+                'fusion_accounting.openai_base_url'),
+            'fusion_accounting.openai_model': params.get_param(
+                'fusion_accounting.openai_model'),
+            'fusion_accounting.openai_api_key': params.get_param(
+                'fusion_accounting.openai_api_key'),
+            'fusion_accounting.provider.bank_rec_suggest': params.get_param(
+                'fusion_accounting.provider.bank_rec_suggest'),
+        }
+
+        params.set_param('fusion_accounting.openai_base_url', self.base_url)
+        params.set_param('fusion_accounting.openai_model', self.model)
+        # Local servers ignore the key but the adapter requires *some* value.
+        params.set_param('fusion_accounting.openai_api_key', 'lm-studio')
+        params.set_param(
+            'fusion_accounting.provider.bank_rec_suggest', 'openai')
+
+        try:
+            partner = self.env['res.partner'].create(
+                {'name': 'Local LLM Partner'})
+            f.make_invoice(self.env, partner=partner, amount=750)
+            bank_line = f.make_bank_line(
+                self.env, amount=750, partner=partner,
+                memo='REF 12345 Local LLM test')
+
+            result = self.env['fusion.reconcile.engine'].suggest_matches(
+                bank_line, limit_per_line=3)
+
+            self.assertIn(bank_line.id, result)
+            suggestions = self.env['fusion.reconcile.suggestion'].search([
+                ('statement_line_id', '=', bank_line.id),
+            ])
+            self.assertGreater(
+                len(suggestions), 0,
+                "Local LLM run should still produce at least one suggestion "
+                "(statistical fallback if AI re-rank fails)")
+        finally:
+            for key, value in prior.items():
+                if value is not None:
+                    params.set_param(key, value)
--- a/fusion_accounting_bank_rec/tests/test_performance_benchmarks.py
+++ b/fusion_accounting_bank_rec/tests/test_performance_benchmarks.py
@@ -0,0 +1,188 @@
+"""Performance benchmarks with P95 targets.
+
+Tagged with ``benchmark`` so they can be selected explicitly:
+    odoo --test-tags 'benchmark' ...
+
+These tests measure wall-clock time and assert P95 stays within plan
+budgets. They run a small N (e.g. 10 iterations) so total test time
+stays under 30s. For real load testing, use a separate harness.
+
+Hard-fail thresholds are 5x the plan budget — they catch egregious
+regressions without flaking on cold-start variance in CI.
+"""
+
+import json
+import statistics
+import time
+
+from odoo.tests.common import HttpCase, TransactionCase, new_test_user, tagged
+
+from . import _factories as f
+
+
+def _percentile(samples, p):
+    """Return the ``p``-th percentile of ``samples`` (0-100)."""
+    if not samples:
+        return None
+    if len(samples) == 1:
+        return samples[0]
+    return statistics.quantiles(samples, n=100)[p - 1]
+
+
+@tagged('post_install', '-at_install', 'benchmark')
+class TestEngineBenchmarks(TransactionCase):
+
+    def setUp(self):
+        super().setUp()
+        self.partner = self.env['res.partner'].create({'name': 'Bench Partner'})
+        # Pre-create a dedicated journal+statement and reuse them across all
+        # iterations -- otherwise the second make_bank_line() collides on the
+        # (code, company) unique constraint of the default 'TEST' journal.
+        self.journal = f.make_bank_journal(
+            self.env, name='Engine Bench Bank', code='EBB')
+        self.statement = f.make_bank_statement(
+            self.env, journal=self.journal, name='Engine Bench Stmt')
+        # Pre-create some invoices so suggest_matches has something to score
+        self.invoices = []
+        for amount in (100, 200, 300, 400, 500):
+            inv = f.make_invoice(self.env, partner=self.partner, amount=amount)
+            self.invoices.append(inv)
+
+    def test_suggest_matches_p95_under_500ms(self):
+        timings = []
+        for _ in range(10):
+            line = f.make_bank_line(
+                self.env, journal=self.journal, statement=self.statement,
+                amount=300, partner=self.partner)
+            start = time.perf_counter()
+            self.env['fusion.reconcile.engine'].suggest_matches(
+                line, limit_per_line=3)
+            elapsed = (time.perf_counter() - start) * 1000  # ms
+            timings.append(elapsed)
+        timings.sort()
+        p95 = _percentile(timings, 95)
+        median = statistics.median(timings)
+        msg = f"suggest_matches: median={median:.1f}ms p95={p95:.1f}ms"
+        print(f"\n  PERF: {msg} (target <500ms)")
+        # Soft assertion -- log but don't fail under 5x budget (cold-start
+        # variance). Hard fail above 5x catches egregious regressions.
+        self.assertLess(
+            p95, 2500,
+            f"suggest_matches P95 way over budget: {msg} "
+            f"(target <500ms, hard fail >2500ms)")
+
+    def test_reconcile_batch_p95_under_5s(self):
+        # Create 50 matchable pairs on a shared journal/statement so we
+        # don't blow the (code, company) constraint.
+        journal = f.make_bank_journal(
+            self.env, name='Batch Bench Bank', code='BBB')
+        statement = f.make_bank_statement(
+            self.env, journal=journal, name='Batch Bench Stmt')
+        line_ids = []
+        for i in range(50):
+            invoice = f.make_invoice(
+                self.env, partner=self.partner, amount=100 + i)
+            del invoice  # ensures the receivable JE exists for engine to find
+            line = f.make_bank_line(
+                self.env, journal=journal, statement=statement,
+                amount=100 + i, partner=self.partner)
+            line_ids.append(line.id)
+        lines = self.env['account.bank.statement.line'].browse(line_ids)
+        start = time.perf_counter()
+        result = self.env['fusion.reconcile.engine'].reconcile_batch(
+            lines, strategy='auto')
+        elapsed = (time.perf_counter() - start) * 1000
+        msg = (f"reconcile_batch(50 lines): {elapsed:.0f}ms, "
+               f"reconciled={result.get('reconciled_count', 'n/a')}")
+        print(f"\n  PERF: {msg} (target <5000ms)")
+        self.assertLess(
+            elapsed, 25000,
+            f"reconcile_batch way over budget: {msg} "
+            f"(target <5000ms, hard fail >25000ms)")
+
+
+@tagged('post_install', '-at_install', 'benchmark')
+class TestControllerBenchmarks(HttpCase):
+
+    USER_LOGIN = 'bench_ctrl_user'
+    USER_PASSWORD = 'bench_ctrl_user'
+
+    def setUp(self):
+        super().setUp()
+        # Mirrors test_controller.py auth setup -- a fresh test user with
+        # the same group bundle the controller expects. The dev DB's admin
+        # password is non-default, so we cannot rely on 'admin'/'admin'.
+        new_test_user(
+            self.env,
+            login=self.USER_LOGIN,
+            password=self.USER_PASSWORD,
+            groups=(
+                'base.group_user,'
+                'account.group_account_user,'
+                'fusion_accounting_core.group_fusion_accounting_admin'
+            ),
+        )
+
+    def test_list_unreconciled_p95_under_200ms(self):
+        partner = self.env['res.partner'].create({'name': 'Ctrl Bench'})
+        journal = f.make_bank_journal(
+            self.env, name='Ctrl Bench Bank', code='CBB')
+        statement = f.make_bank_statement(
+            self.env, journal=journal, name='Ctrl Bench Stmt')
+        for i in range(50):
+            f.make_bank_line(
+                self.env, journal=journal, statement=statement,
+                amount=100 + i, partner=partner,
+                memo=f'Ctrl bench line {i}')
+        self.authenticate(self.USER_LOGIN, self.USER_PASSWORD)
+        body = json.dumps({
+            'jsonrpc': '2.0',
+            'method': 'call',
+            'params': {
+                'journal_id': journal.id,
+                'limit': 50,
+                'offset': 0,
+                'company_id': self.env.company.id,
+            },
+            'id': 1,
+        })
+        timings = []
+        for _ in range(10):
+            start = time.perf_counter()
+            response = self.url_open(
+                '/fusion/bank_rec/list_unreconciled',
+                data=body,
+                headers={'Content-Type': 'application/json'},
+            )
+            elapsed = (time.perf_counter() - start) * 1000
+            self.assertEqual(response.status_code, 200)
+            timings.append(elapsed)
+        timings.sort()
+        p95 = _percentile(timings, 95)
+        median = statistics.median(timings)
+        msg = f"list_unreconciled: median={median:.1f}ms p95={p95:.1f}ms"
+        print(f"\n  PERF: {msg} (target <200ms)")
+        self.assertLess(
+            p95, 1000,
+            f"list_unreconciled P95 way over budget: {msg} "
+            f"(target <200ms, hard fail >1000ms)")
+
+
+@tagged('post_install', '-at_install', 'benchmark')
+class TestMVBenchmarks(TransactionCase):
+
+    def test_mv_refresh_under_2s(self):
+        # Non-concurrent refresh works even before the MV has been seeded
+        # with a concurrent-refresh-eligible state.
+        start = time.perf_counter()
+        self.env['fusion.unreconciled.bank.line.mv']._refresh(
+            concurrently=False)
+        elapsed = (time.perf_counter() - start) * 1000
+        msg = (f"MV refresh: {elapsed:.0f}ms "
+               f"(current row count varies with DB state)")
+        print(f"\n  PERF: {msg} (target <2000ms)")
+        # Soft hard ceiling: 10s
+        self.assertLess(
+            elapsed, 10000,
+            f"MV refresh way over budget: {msg} "
+            f"(target <2000ms, hard fail >10000ms)")