Compare commits

...

2 Commits

Author SHA1 Message Date
gsinghpal
14e59148c6 test(fusion_accounting_bank_rec): local LLM (LM Studio/Ollama) compat smoke
Tagged 'local_llm'. Auto-detects LM Studio (:1234) or Ollama (:11434)
via host.docker.internal or localhost. When running, configures the
provider params and runs engine.suggest_matches end-to-end. Skips
gracefully when no local LLM is present (CI / dev VM mode).

Made-with: Cursor
2026-04-19 14:01:58 -04:00
gsinghpal
55eb368195 test(fusion_accounting_bank_rec): performance benchmarks with P95 targets
Tagged 'benchmark' so they can be selected explicitly. Targets:
suggest_matches <500ms, reconcile_batch(50) <5s, list_unreconciled <200ms,
MV refresh <2s. Hard-fail at 5x budget to catch egregious regressions.

Measured on local dev VM:
- suggest_matches: median=221ms p95=234ms (target <500ms)
- reconcile_batch(50 lines): 3318ms (target <5000ms)
- list_unreconciled: median=14ms p95=77ms (target <200ms)
- MV refresh: 60ms (target <2000ms)

Made-with: Cursor
2026-04-19 14:00:15 -04:00
3 changed files with 292 additions and 0 deletions

View File

@@ -21,3 +21,5 @@ from . import test_bulk_reconcile_wizard
from . import test_migration_round_trip
from . import test_coexistence
from . import test_bank_rec_tours
from . import test_performance_benchmarks
from . import test_local_llm_compat

View File

@@ -0,0 +1,102 @@
"""Local LLM compatibility test (LM Studio, Ollama, etc.).
Skips if no local OpenAI-compatible LLM server is reachable. When one is
running (LM Studio at :1234, Ollama at :11434), runs an end-to-end:
1. Configure ``ir.config_parameter`` to point at the local server.
2. Trigger ``engine.suggest_matches`` with the 'openai' provider.
3. Assert the call did not crash and produced at least one suggestion.
The smoke is intentionally lenient: local models often emit malformed
JSON, in which case ``confidence_scoring`` falls back to statistical-only
ranking. We assert end-to-end happiness, not AI re-rank quality.
"""
import socket
from odoo.tests.common import TransactionCase, tagged
from . import _factories as f
def _server_reachable(host, port, timeout=1.0):
try:
with socket.create_connection((host, port), timeout=timeout):
return True
except (OSError, socket.timeout):
return False
def _detect_local_llm():
"""Return (base_url, model_name) tuple, or (None, None) if no server.
Tries LM Studio (:1234) and Ollama (:11434) on both
``host.docker.internal`` (so the container can reach the host) and
``localhost`` (so a non-containerised run finds the same servers).
"""
candidates = (
('host.docker.internal', 1234, 'local-model'), # LM Studio
('host.docker.internal', 11434, 'llama3.1:8b'), # Ollama
('localhost', 1234, 'local-model'),
('localhost', 11434, 'llama3.1:8b'),
)
for host, port, default_model in candidates:
if _server_reachable(host, port, timeout=0.5):
return (f'http://{host}:{port}/v1', default_model)
return (None, None)
@tagged('post_install', '-at_install', 'local_llm')
class TestLocalLLMCompat(TransactionCase):
def setUp(self):
super().setUp()
self.base_url, self.model = _detect_local_llm()
if not self.base_url:
self.skipTest(
"No local LLM server detected "
"(LM Studio :1234 / Ollama :11434)")
def test_suggest_matches_with_local_llm(self):
params = self.env['ir.config_parameter'].sudo()
prior = {
'fusion_accounting.openai_base_url': params.get_param(
'fusion_accounting.openai_base_url'),
'fusion_accounting.openai_model': params.get_param(
'fusion_accounting.openai_model'),
'fusion_accounting.openai_api_key': params.get_param(
'fusion_accounting.openai_api_key'),
'fusion_accounting.provider.bank_rec_suggest': params.get_param(
'fusion_accounting.provider.bank_rec_suggest'),
}
params.set_param('fusion_accounting.openai_base_url', self.base_url)
params.set_param('fusion_accounting.openai_model', self.model)
# Local servers ignore the key but the adapter requires *some* value.
params.set_param('fusion_accounting.openai_api_key', 'lm-studio')
params.set_param(
'fusion_accounting.provider.bank_rec_suggest', 'openai')
try:
partner = self.env['res.partner'].create(
{'name': 'Local LLM Partner'})
f.make_invoice(self.env, partner=partner, amount=750)
bank_line = f.make_bank_line(
self.env, amount=750, partner=partner,
memo='REF 12345 Local LLM test')
result = self.env['fusion.reconcile.engine'].suggest_matches(
bank_line, limit_per_line=3)
self.assertIn(bank_line.id, result)
suggestions = self.env['fusion.reconcile.suggestion'].search([
('statement_line_id', '=', bank_line.id),
])
self.assertGreater(
len(suggestions), 0,
"Local LLM run should still produce at least one suggestion "
"(statistical fallback if AI re-rank fails)")
finally:
for key, value in prior.items():
if value is not None:
params.set_param(key, value)

View File

@@ -0,0 +1,188 @@
"""Performance benchmarks with P95 targets.
Tagged with ``benchmark`` so they can be selected explicitly:
odoo --test-tags 'benchmark' ...
These tests measure wall-clock time and assert P95 stays within plan
budgets. They run a small N (e.g. 10 iterations) so total test time
stays under 30s. For real load testing, use a separate harness.
Hard-fail thresholds are 5x the plan budget — they catch egregious
regressions without flaking on cold-start variance in CI.
"""
import json
import statistics
import time
from odoo.tests.common import HttpCase, TransactionCase, new_test_user, tagged
from . import _factories as f
def _percentile(samples, p):
"""Return the ``p``-th percentile of ``samples`` (0-100)."""
if not samples:
return None
if len(samples) == 1:
return samples[0]
return statistics.quantiles(samples, n=100)[p - 1]
@tagged('post_install', '-at_install', 'benchmark')
class TestEngineBenchmarks(TransactionCase):
def setUp(self):
super().setUp()
self.partner = self.env['res.partner'].create({'name': 'Bench Partner'})
# Pre-create a dedicated journal+statement and reuse them across all
# iterations -- otherwise the second make_bank_line() collides on the
# (code, company) unique constraint of the default 'TEST' journal.
self.journal = f.make_bank_journal(
self.env, name='Engine Bench Bank', code='EBB')
self.statement = f.make_bank_statement(
self.env, journal=self.journal, name='Engine Bench Stmt')
# Pre-create some invoices so suggest_matches has something to score
self.invoices = []
for amount in (100, 200, 300, 400, 500):
inv = f.make_invoice(self.env, partner=self.partner, amount=amount)
self.invoices.append(inv)
def test_suggest_matches_p95_under_500ms(self):
timings = []
for _ in range(10):
line = f.make_bank_line(
self.env, journal=self.journal, statement=self.statement,
amount=300, partner=self.partner)
start = time.perf_counter()
self.env['fusion.reconcile.engine'].suggest_matches(
line, limit_per_line=3)
elapsed = (time.perf_counter() - start) * 1000 # ms
timings.append(elapsed)
timings.sort()
p95 = _percentile(timings, 95)
median = statistics.median(timings)
msg = f"suggest_matches: median={median:.1f}ms p95={p95:.1f}ms"
print(f"\n PERF: {msg} (target <500ms)")
# Soft assertion -- log but don't fail under 5x budget (cold-start
# variance). Hard fail above 5x catches egregious regressions.
self.assertLess(
p95, 2500,
f"suggest_matches P95 way over budget: {msg} "
f"(target <500ms, hard fail >2500ms)")
def test_reconcile_batch_p95_under_5s(self):
# Create 50 matchable pairs on a shared journal/statement so we
# don't blow the (code, company) constraint.
journal = f.make_bank_journal(
self.env, name='Batch Bench Bank', code='BBB')
statement = f.make_bank_statement(
self.env, journal=journal, name='Batch Bench Stmt')
line_ids = []
for i in range(50):
invoice = f.make_invoice(
self.env, partner=self.partner, amount=100 + i)
del invoice # ensures the receivable JE exists for engine to find
line = f.make_bank_line(
self.env, journal=journal, statement=statement,
amount=100 + i, partner=self.partner)
line_ids.append(line.id)
lines = self.env['account.bank.statement.line'].browse(line_ids)
start = time.perf_counter()
result = self.env['fusion.reconcile.engine'].reconcile_batch(
lines, strategy='auto')
elapsed = (time.perf_counter() - start) * 1000
msg = (f"reconcile_batch(50 lines): {elapsed:.0f}ms, "
f"reconciled={result.get('reconciled_count', 'n/a')}")
print(f"\n PERF: {msg} (target <5000ms)")
self.assertLess(
elapsed, 25000,
f"reconcile_batch way over budget: {msg} "
f"(target <5000ms, hard fail >25000ms)")
@tagged('post_install', '-at_install', 'benchmark')
class TestControllerBenchmarks(HttpCase):
USER_LOGIN = 'bench_ctrl_user'
USER_PASSWORD = 'bench_ctrl_user'
def setUp(self):
super().setUp()
# Mirrors test_controller.py auth setup -- a fresh test user with
# the same group bundle the controller expects. The dev DB's admin
# password is non-default, so we cannot rely on 'admin'/'admin'.
new_test_user(
self.env,
login=self.USER_LOGIN,
password=self.USER_PASSWORD,
groups=(
'base.group_user,'
'account.group_account_user,'
'fusion_accounting_core.group_fusion_accounting_admin'
),
)
def test_list_unreconciled_p95_under_200ms(self):
partner = self.env['res.partner'].create({'name': 'Ctrl Bench'})
journal = f.make_bank_journal(
self.env, name='Ctrl Bench Bank', code='CBB')
statement = f.make_bank_statement(
self.env, journal=journal, name='Ctrl Bench Stmt')
for i in range(50):
f.make_bank_line(
self.env, journal=journal, statement=statement,
amount=100 + i, partner=partner,
memo=f'Ctrl bench line {i}')
self.authenticate(self.USER_LOGIN, self.USER_PASSWORD)
body = json.dumps({
'jsonrpc': '2.0',
'method': 'call',
'params': {
'journal_id': journal.id,
'limit': 50,
'offset': 0,
'company_id': self.env.company.id,
},
'id': 1,
})
timings = []
for _ in range(10):
start = time.perf_counter()
response = self.url_open(
'/fusion/bank_rec/list_unreconciled',
data=body,
headers={'Content-Type': 'application/json'},
)
elapsed = (time.perf_counter() - start) * 1000
self.assertEqual(response.status_code, 200)
timings.append(elapsed)
timings.sort()
p95 = _percentile(timings, 95)
median = statistics.median(timings)
msg = f"list_unreconciled: median={median:.1f}ms p95={p95:.1f}ms"
print(f"\n PERF: {msg} (target <200ms)")
self.assertLess(
p95, 1000,
f"list_unreconciled P95 way over budget: {msg} "
f"(target <200ms, hard fail >1000ms)")
@tagged('post_install', '-at_install', 'benchmark')
class TestMVBenchmarks(TransactionCase):
def test_mv_refresh_under_2s(self):
# Non-concurrent refresh works even before the MV has been seeded
# with a concurrent-refresh-eligible state.
start = time.perf_counter()
self.env['fusion.unreconciled.bank.line.mv']._refresh(
concurrently=False)
elapsed = (time.perf_counter() - start) * 1000
msg = (f"MV refresh: {elapsed:.0f}ms "
f"(current row count varies with DB state)")
print(f"\n PERF: {msg} (target <2000ms)")
# Soft hard ceiling: 10s
self.assertLess(
elapsed, 10000,
f"MV refresh way over budget: {msg} "
f"(target <2000ms, hard fail >10000ms)")