fix(fusion_helpdesk_central): close magic-link race + cron savepoint + avg pivot

Findings from the post-feature code review on commit 396170b4. Addresses
the two CRITICAL + one HIGH + two MEDIUM issues; rest are deferred.

CRITICAL #1 — magic-link token race:
  Two near-simultaneous POSTs on the same /engagement/<token>/approve
  could both SELECT state='pending' under READ COMMITTED, both post
  chatter, and let the last writer flip the outcome. Now the POST path
  does an atomic UPDATE helpdesk_ticket SET token=NULL WHERE token=%s
  AND state='pending' RETURNING id — the loser gets no row back and
  renders the friendly invalid-link page. Verified live: 2 concurrent
  POSTs → 1 wins, 1 loses, exactly 1 chatter row.

CRITICAL #2 — reminder cron without per-row savepoint:
  Per CLAUDE.md rule #14, a DB failure mid-loop aborts the whole
  transaction and silently kills the rest of the batch. Wrap each row's
  send_mail+write in `with self.env.cr.savepoint()`. Also corrected the
  success-count log (was len(stale), now actual sent count).

HIGH #3 — turnaround pivot summed instead of averaged:
  fields.Float defaults to SUM aggregator; meaningless for per-ticket
  decision delays. Added aggregator='avg' so the pivot reads "avg
  turnaround per ticket" not "summed wait time".

HIGH #4 — added test_concurrent_claim_only_one_wins regression test
  that fires two real HTTP POSTs against the same token and asserts
  exactly one wins + exactly one approval chatter row exists.

MEDIUM #6 — cron nextcall pinned to 09:00 tomorrow so reminders land
  in business hours regardless of when the module was last upgraded.

MEDIUM #10 — escalate failed owner-partner-create from WARNING to
  ERROR (via _logger.exception) since silent attribution to the bot
  account is a real audit-trail confusion.

Deferred (follow-up commits): #5, #7 (executor cleanup), #8, #9,
#11–#14 — none are bugs, all spec-drift or hardening.
This commit is contained in:
gsinghpal
2026-05-27 13:16:20 -04:00
parent 396170b438
commit f1a2b300f7
4 changed files with 130 additions and 22 deletions

View File

@@ -25,7 +25,7 @@ class FusionHelpdeskEngagementController(http.Controller):
# ------------------------------------------------------------------
# Token resolution — single source of truth for the GET + POST handlers.
# ------------------------------------------------------------------
def _resolve(self, token, decision):
def _resolve(self, token, decision, claim=False):
"""Return (ticket, decision_state) or (None, None) on any problem.
The "no problem" cases:
@@ -33,14 +33,41 @@ class FusionHelpdeskEngagementController(http.Controller):
- decision is one of {'approve', 'reject'}
- a single ticket matches the token AND is in state='pending'
Anything else -> (None, None), caller renders the friendly
"link no longer valid" page.
When `claim=True` (POST path), atomically clear the token via
UPDATE ... RETURNING so two near-simultaneous clicks can't both
succeed — the second SELECT-then-write would otherwise double-post
the decision chatter and let the last writer flip the outcome.
The atomic UPDATE makes single-use a property of the SQL, not of
the application logic.
When `claim=False` (GET path), we just look — no token rotation
— so re-loading the confirm page before clicking still works.
"""
if not token or not isinstance(token, str):
return (None, None)
if decision not in ('approve', 'reject'):
return (None, None)
decision_state = 'approved' if decision == 'approve' else 'rejected'
if claim:
# Atomic claim: clear the token only if it's still pending. The
# WHERE clause re-checks state so a re-engagement (which
# rotates the token) between GET and POST won't be claimed.
request.env.cr.execute(
"UPDATE helpdesk_ticket "
"SET x_fc_engagement_token = NULL "
"WHERE x_fc_engagement_token = %s "
" AND x_fc_engagement_state = 'pending' "
"RETURNING id",
(token,),
)
row = request.env.cr.fetchone()
if not row:
return (None, None) # lost the race or token rotated
ticket = request.env['helpdesk.ticket'].sudo().browse(row[0])
# Invalidate the ORM cache for the field we just changed via raw SQL.
ticket.invalidate_recordset(['x_fc_engagement_token'])
return (ticket, decision_state)
# GET path — just look, don't claim.
ticket = request.env['helpdesk.ticket'].sudo().search(
[('x_fc_engagement_token', '=', token),
('x_fc_engagement_state', '=', 'pending')],
@@ -81,9 +108,11 @@ class FusionHelpdeskEngagementController(http.Controller):
type='http', auth='public', methods=['POST'], csrf=False, sitemap=False,
)
def engagement_submit(self, token, decision, **post):
ticket, decision_state = self._resolve(token, decision)
# claim=True does an atomic UPDATE that clears the token only if
# the row is still pending — wins the race against a second click.
ticket, decision_state = self._resolve(token, decision, claim=True)
if not ticket:
# Could be a second click on the same link, or a token rotated
# Could be a second click on the same link, a token rotated
# by a re-engagement, or a typo. Same friendly page for all.
return request.render(
'fusion_helpdesk_central.engagement_invalid', {},
@@ -139,9 +168,14 @@ class FusionHelpdeskEngagementController(http.Controller):
'email': email,
})
except Exception:
_logger.warning(
# ERROR not WARNING: a failed owner partner-create means the
# approval chatter line will say "Approved by support@nexasystems.ca"
# (the bot) instead of the actual owner — a real audit-trail
# confusion that someone needs to look at.
_logger.exception(
'fusion_helpdesk_central: could not create owner partner '
'for %s on ticket %s; chatter will be attributed to the '
'service account.', email, ticket.id,
'for %s on ticket %s; chatter author will fall back to the '
'service account. Body text still names the owner correctly '
'via format_engagement_chatter.', email, ticket.id,
)
return None

View File

@@ -23,6 +23,15 @@
<field name="user_id" ref="base.user_root"/>
<field name="interval_number">1</field>
<field name="interval_type">days</field>
<!--
Pin the first run to 09:00 tomorrow so subsequent daily
fires land in business hours. Without this, Odoo defaults
nextcall to install-time → reminders go out at 2am if you
happened to deploy at 2am, which is wrong (owners shouldn't
get pinged in the middle of the night).
-->
<field name="nextcall"
eval="(DateTime.now().replace(hour=9, minute=0, second=0, microsecond=0) + timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S')"/>
<field name="active" eval="True"/>
</record>

View File

@@ -86,8 +86,11 @@ class HelpdeskTicket(models.Model):
string='Owner Turnaround (h)',
compute='_compute_engagement_turnaround',
store=True, copy=False, digits=(8, 2),
aggregator='avg', # Pivot default is SUM for Float — meaningless here
help='Hours between engagement-sent and owner decision. Stored so '
'the Owner Engagements pivot can aggregate without recomputing.',
'the Owner Engagements pivot can aggregate without recomputing. '
'Aggregated as average across rows so the pivot reads "avg '
'turnaround per ticket", not "summed wait-time".',
)
# message_post-friendly index for the reminder cron + token resolution.
@@ -292,28 +295,41 @@ class HelpdeskTicket(models.Model):
)
return 0
now = fields.Datetime.now()
sent = 0
for ticket in stale:
# Per-row savepoint: a DB failure on one ticket (constraint hit,
# mail-server hiccup that propagates as an OperationalError, etc.)
# would otherwise leave the whole cron transaction in an aborted
# state — every subsequent row's `reminded_at` write would fail
# silently with InFailedSqlTransaction. CLAUDE.md rule #14: use
# `cr.savepoint()` not `cr.commit()` inside the loop (commits
# raise inside TransactionCase).
try:
template.with_context(
fhc_is_reminder=True,
fhc_personal_note='',
).send_mail(ticket.id, force_send=False)
ticket.x_fc_engagement_reminded_at = now
except Exception: # noqa: BLE001 — reminder must never break cron loop
with self.env.cr.savepoint():
template.with_context(
fhc_is_reminder=True,
fhc_personal_note='',
).send_mail(ticket.id, force_send=False)
ticket.x_fc_engagement_reminded_at = now
sent += 1
except Exception: # noqa: BLE001 — never break the batch
_logger.exception(
'fusion_helpdesk_central: reminder send failed for '
'ticket %s; will retry next run.', ticket.id,
)
_logger.info(
'fusion_helpdesk_central: reminder cron sent %s reminder(s).',
len(stale),
'fusion_helpdesk_central: reminder cron sent %s reminder(s) '
'out of %s candidate(s).', sent, len(stale),
)
return len(stale)
return sent
def _fc_finalize_engagement(self, decision, owner_partner, comment=None):
"""Apply the owner's decision: post chatter (public), clear token,
write state + decided_at. Called from the public portal controller
after a magic link is clicked + confirmed.
"""Apply the owner's decision: post chatter (public), write state +
decided_at. Called from the public portal controller AFTER the
controller has already atomically claimed (cleared) the token via
UPDATE...RETURNING — so we don't clear it again here; doing so
would race with a re-engagement that happened to rotate the token
between our write and the controller's claim.
Chatter is posted as a public comment (subtype mail.mt_comment) so
it propagates to the employee's My Tickets thread per the
@@ -336,7 +352,6 @@ class HelpdeskTicket(models.Model):
self.write({
'x_fc_engagement_state': decision,
'x_fc_engagement_decided_at': fields.Datetime.now(),
'x_fc_engagement_token': False,
})
# ------------------------------------------------------------------

View File

@@ -391,6 +391,56 @@ class TestEngagementPortal(HttpCase):
t.unlink()
self.env.cr.commit()
def test_concurrent_claim_only_one_wins(self):
"""Regression for the magic-link double-click race.
Two POSTs against the same token must NOT both record decisions.
The controller uses UPDATE...RETURNING with a WHERE on
state='pending' so the second call gets a NULL row back and
returns the invalid-link page. Without that atomic claim, two
worker transactions could each SELECT the same pending row and
both post chatter — last-writer-wins on state.
url_open hits live HTTP, so each call is its own request/
transaction — different from a same-transaction simulation and
the actual production race scenario.
"""
t = self._make_pending_ticket()
token = t.x_fc_engagement_token
try:
r1 = self.url_open(
'/fusion_helpdesk/engagement/%s/approve' % token,
data={'comment': 'first'}, timeout=10,
)
r2 = self.url_open(
'/fusion_helpdesk/engagement/%s/approve' % token,
data={'comment': 'second'}, timeout=10,
)
self.assertEqual(r1.status_code, 200)
self.assertEqual(r2.status_code, 200)
ok_count = sum(
'Approval recorded' in r.text for r in (r1, r2))
invalid_count = sum(
'Link no longer valid' in r.text for r in (r1, r2))
self.assertEqual(
ok_count, 1,
'Both clicks must not both succeed (race condition).',
)
self.assertEqual(invalid_count, 1)
t.invalidate_recordset()
approval_chatter = self.env['mail.message'].search_count([
('res_id', '=', t.id),
('model', '=', 'helpdesk.ticket'),
('body', 'ilike', 'Approved by'),
])
self.assertEqual(
approval_chatter, 1,
'Race must not produce duplicate approval chatter posts.',
)
finally:
t.unlink()
self.env.cr.commit()
def test_post_records_decision_and_invalidates_token(self):
t = self._make_pending_ticket()
token = t.x_fc_engagement_token