This commit is contained in:
gsinghpal
2026-05-21 03:37:25 -04:00
parent b2f483d67c
commit 1314f4581d
47 changed files with 5730 additions and 177 deletions

View File

@@ -21,13 +21,26 @@ Issue button on the cert form, which stays as the fallback path.
import base64
import io
import logging
import os
import re
import shutil
import subprocess
import tempfile
from markupsafe import Markup
from odoo import _, api, fields, models
from odoo.exceptions import UserError
_logger = logging.getLogger(__name__)
# Minimum pixel-area for an extracted RTF image to be treated as the
# "microscope photo" candidate. Filters out narrow header banners
# (~790x203 = 160k pixels) while keeping standard XDAL exports
# (~1024x768 = 786k). See CLAUDE.md "entech apt is broken" for the
# libwmf install path that makes this possible.
_FP_RTF_IMAGE_MIN_AREA = 200_000
# Fischerscope XDAL 600 reading line, e.g.
# n= 1 NiP 1= 0.6885 mils Ni 1 = 91.323 % P 1 = 8.6771 %
@@ -38,10 +51,206 @@ _FISCHER_READING_RE = re.compile(
r'\s+P\s+\d+\s*=\s*([\d.]+)\s*%',
re.IGNORECASE,
)
_FISCHER_CALIB_RE = re.compile(r'Calibr\.\s*Std\.\s*Set\s+(.+)', re.IGNORECASE)
# Capture every {\pict ... \wmetafile8 ...hex...} group in an RTF, in
# document order. The hex blob can be interspersed with whitespace
# (RTF wraps to 80 cols) — the consumer strips it.
_RTF_PICT_WMF_RE = re.compile(
r'\{\\pict'
r'(?:\\[a-zA-Z]+-?\d*\s?)*?'
r'\\wmetafile8'
r'(?:\\[a-zA-Z]+-?\d*\s?)*'
r'\s*([0-9a-fA-F\s]+?)'
r'\}',
re.DOTALL,
)
def _fp_extract_rtf_images(raw_bytes):
"""Pull all WMF picture blocks out of an RTF, unpack to PNG via
libwmf, and return the list of PNG bytes in document order.
XDAL 600 RTF exports embed each picture as a WMF metafile wrapping
the actual raster. ImageMagick on Debian Bookworm doesn't carry a
WMF delegate, so we shell out to `wmf2svg` (from libwmf-bin) — it
writes a thin SVG and a side-file `*-N.png` per raster block. We
keep the PNGs, drop the SVG/WMF temp files.
Returns [] (not raise) on any tooling/parse failure; the cert
issue keeps working even when image extraction can't run.
"""
if not raw_bytes:
return []
try:
text = raw_bytes.decode('latin-1', errors='replace')
except Exception:
return []
blobs = []
for m in _RTF_PICT_WMF_RE.finditer(text):
hex_blob = re.sub(r'\s+', '', m.group(1))
try:
blobs.append(bytes.fromhex(hex_blob))
except ValueError:
continue
if not blobs:
return []
tmpdir = tempfile.mkdtemp(prefix='fp_rtf_wmf_')
pngs = []
try:
for i, wmf in enumerate(blobs):
wmf_path = os.path.join(tmpdir, 'pict%d.wmf' % i)
svg_path = os.path.join(tmpdir, 'pict%d.svg' % i)
with open(wmf_path, 'wb') as fh:
fh.write(wmf)
try:
subprocess.run(
['wmf2svg', '-o', svg_path, wmf_path],
capture_output=True, timeout=20, check=False,
)
except (FileNotFoundError, subprocess.TimeoutExpired) as e:
_logger.warning(
'wmf2svg unavailable or timed out (%s) — skipping '
'RTF image extraction.', e,
)
return []
# wmf2svg writes <basename>-N.png next to the SVG.
for fn in sorted(os.listdir(tmpdir)):
if fn.startswith('pict%d-' % i) and fn.endswith('.png'):
full = os.path.join(tmpdir, fn)
with open(full, 'rb') as fh:
pngs.append(fh.read())
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
return pngs
def _fp_pick_microscope_image(png_bytes_list):
"""Pick the largest-area PNG (by pixel count, not file size) from
the list — that's almost always the microscope photo. Header
banners are wide-but-thin so their pixel area falls below the
threshold. Returns (png_bytes, width, height) or (None, 0, 0)
when no PNG meets the threshold.
"""
try:
from PIL import Image
except ImportError:
# Pillow ships with Odoo; this is defensive.
return (png_bytes_list[0] if png_bytes_list else None, 0, 0)
best = None
best_area = 0
for png in png_bytes_list:
try:
with Image.open(io.BytesIO(png)) as im:
area = im.width * im.height
if area > best_area and area >= _FP_RTF_IMAGE_MIN_AREA:
best = (png, im.width, im.height)
best_area = area
except Exception:
continue
return best or (None, 0, 0)
_FISCHER_CALIB_RE = re.compile(r'Calibr\.\s*Std\.\s*Set\s+(.+?)(?:\s{2,}|$)', re.IGNORECASE)
_FISCHER_OPERATOR_RE = re.compile(r'Operator:\s*(\S+)', re.IGNORECASE)
_FISCHER_DATE_RE = re.compile(r'Date:\s*([\d/]+)', re.IGNORECASE)
_FISCHER_TIME_RE = re.compile(r'Time:\s*([\d:]+\s*[APMapm]*)')
# XDAL 600 header lines — only present on full RTF reports (not on
# the .docx body the upstream parser already handled).
_FISCHER_PRODUCT_RE = re.compile(r'Product:\s*([^\r\n]+?)(?:\s{2,}|$)', re.IGNORECASE)
_FISCHER_DIRECTORY_RE = re.compile(r'Directory:\s*([^\r\n]+?)(?:\s{2,}|$)', re.IGNORECASE)
_FISCHER_APPLICATION_RE = re.compile(r'Application:\s*([^\r\n]+?)(?:\s{2,}|$)', re.IGNORECASE)
_FISCHER_MTIME_RE = re.compile(r'Measuring\s+time\s+(\d+)\s*sec', re.IGNORECASE)
_FISCHER_EQUIPMENT_RE = re.compile(r'(Fischerscope[^\r\n]*XDAL\s*\d+)', re.IGNORECASE)
def _fp_strip_rtf(raw_bytes):
"""Best-effort RTF → plain text. RTF is text-based with control
words prefixed by `\\` and groups wrapped in `{}`. We need to strip
all of those plus the hex-encoded image data so the Fischerscope
reading regex hits clean text.
Not a full parser — meant for the narrow case of XRF/XDAL reports
that have a simple body wrapped around an embedded WMF image.
"""
if not raw_bytes:
return ''
# RTF is ASCII-safe; latin-1 round-trips every byte.
text = raw_bytes.decode('latin-1', errors='replace')
# Drop destination groups entirely — these are the image data,
# font tables, color tables, etc. The pattern `{\* ...}` and other
# nested destinations carry binary-ish hex strings we never want.
text = re.sub(r'\{\\\*[^{}]*\}', ' ', text)
text = re.sub(r'\{\\fonttbl[^{}]*\}', ' ', text)
text = re.sub(r'\{\\colortbl[^{}]*\}', ' ', text)
# Pictures: {\pict ...} contains hex image data. The body is the
# part between `\pict...goal\d+` and the closing brace of the group.
# Easier: nuke anything matching the picture marker through the
# next closing brace at the same depth (single-level approximation
# — works for FedEx/XRF docs that have one image per pict block).
text = re.sub(r'\{\\pict[^{}]*\}', ' ', text)
# Remove control words like \rtf1, \ansicpg1252, \par, \tab,
# \tx2840, etc. (`\` + letters + optional digits + optional space)
text = re.sub(r'\\[A-Za-z]+-?\d*\s?', ' ', text)
# Hex escapes (e.g. \'ae for special chars)
text = re.sub(r"\\'[0-9a-fA-F]{2}", ' ', text)
# Other backslash escapes (`\\`, `\{`, `\}`)
text = re.sub(r'\\[^A-Za-z\s]', ' ', text)
# Strip remaining braces
text = text.replace('{', ' ').replace('}', ' ')
# Collapse runs of whitespace so the Fischerscope regex doesn't
# have to deal with weird spacing artefacts from the strip pass.
text = re.sub(r'[ \t]+', ' ', text)
return text
def _fp_parse_fischerscope_rtf(raw_bytes):
"""Fischerscope XDAL 600 RTF export → same dict shape as the
.docx parser. RTF detection is by magic bytes (`{\\rtf`) — the
XRF software names the file `.doc` for legacy reasons, but the
contents are RTF.
"""
empty = {
'readings': [], 'calibration': '', 'operator': '',
'date_str': '', 'time_str': '',
'product': '', 'directory': '', 'application': '',
'measuring_time_sec': 0, 'equipment': '',
'raw_text': '',
}
if not raw_bytes:
return empty
text = _fp_strip_rtf(raw_bytes)
readings = []
for m in _FISCHER_READING_RE.finditer(text):
try:
readings.append((
float(m.group(2)),
float(m.group(3)),
float(m.group(4)),
))
except ValueError:
continue
def _grab(rx):
m = rx.search(text)
return m.group(1).strip() if m else ''
mtime = 0
m = _FISCHER_MTIME_RE.search(text)
if m:
try:
mtime = int(m.group(1))
except ValueError:
mtime = 0
return {
'readings': readings,
'calibration': _grab(_FISCHER_CALIB_RE),
'operator': _grab(_FISCHER_OPERATOR_RE),
'date_str': _grab(_FISCHER_DATE_RE),
'time_str': _grab(_FISCHER_TIME_RE),
'product': _grab(_FISCHER_PRODUCT_RE),
'directory': _grab(_FISCHER_DIRECTORY_RE),
'application': _grab(_FISCHER_APPLICATION_RE),
'measuring_time_sec': mtime,
'equipment': _grab(_FISCHER_EQUIPMENT_RE),
'raw_text': text,
}
def _fp_parse_fischerscope_docx(raw_bytes):
@@ -227,6 +436,14 @@ class FpCertIssueWizardLine(models.TransientModel):
)
fischer_file = fields.Binary(string='Fischerscope File (PDF or .docx)')
fischer_filename = fields.Char(string='Filename')
# Optional: microscope/coupon image exported separately from the
# XDAL 600. The RTF carries an embedded WMF that the entech host
# can't rasterize (no imagemagick/libwmf — see CLAUDE.md "entech
# apt is in a broken-deps state"), so the operator exports a PNG
# from the XDAL software and uploads it here. Rendered inline on
# the CoC's thickness section when present.
fischer_image_file = fields.Binary(string='Measurement Image (PNG/JPEG)')
fischer_image_filename = fields.Char(string='Image Filename')
parsed_summary = fields.Text(
string='Parsed Summary', readonly=True,
help='Output of the .docx parser. Populated when you attach a '
@@ -274,22 +491,29 @@ class FpCertIssueWizardLine(models.TransientModel):
@api.onchange('fischer_file', 'fischer_filename')
def _onchange_fischer_file(self):
"""Try to parse .docx on upload; prefill the readings + summary."""
"""Parse .docx OR RTF on upload (XDAL 600 names RTF files
`.doc` — detected by magic bytes; see CLAUDE.md "Fischerscope
XDAL 600 `.doc` files are actually RTF"). Prefill the readings
+ summary so the operator can verify before issuing."""
if not self.fischer_file:
return
name = (self.fischer_filename or '').lower()
if not name.endswith('.docx'):
self.parsed_summary = _(
'Non-.docx upload (%s) — file will be attached as '
'evidence. Type readings manually below if needed.'
) % (self.fischer_filename or 'unnamed')
return
try:
raw = base64.b64decode(self.fischer_file)
except Exception:
self.parsed_summary = _('Could not decode the uploaded file.')
return
parsed = _fp_parse_fischerscope_docx(raw)
name = (self.fischer_filename or '').lower()
is_rtf = raw[:5] == b'{\\rtf' or name.endswith('.rtf')
if is_rtf:
parsed = _fp_parse_fischerscope_rtf(raw)
elif name.endswith('.docx'):
parsed = _fp_parse_fischerscope_docx(raw)
else:
self.parsed_summary = _(
'Non-parseable upload (%s) — file will be attached as '
'evidence. Type readings manually below if needed.'
) % (self.fischer_filename or 'unnamed')
return
readings = parsed.get('readings') or []
if readings:
self.reading_line_ids = [(5, 0, 0)] + [
@@ -312,15 +536,70 @@ class FpCertIssueWizardLine(models.TransientModel):
't': parsed.get('time_str') or '',
}
def _write_thickness_metadata_to_cert(self, cert, parsed):
"""Persist the Fischerscope header block (operator, product,
application, equipment, measuring time, date/time, source
filename) onto the cert so the CoC report can render a full
report block instead of a bare readings table.
"""
vals = {}
field_map = (
('x_fc_thickness_operator', parsed.get('operator')),
('x_fc_thickness_product', parsed.get('product')),
('x_fc_thickness_directory', parsed.get('directory')),
('x_fc_thickness_application', parsed.get('application')),
('x_fc_thickness_measuring_time_sec',
parsed.get('measuring_time_sec') or 0),
('x_fc_thickness_equipment',
parsed.get('equipment') or 'Fischerscope XDAL 600'),
('x_fc_thickness_source_filename',
self.fischer_filename or ''),
)
for fname, fval in field_map:
if fname in cert._fields and fval:
vals[fname] = fval
# Combine the gauge's date+time and parse to Datetime — try a
# few formats since XDAL exports vary (12h vs 24h, with/without
# seconds). Best-effort: leave the field blank if no format
# matches rather than crashing the cert issue.
date_str = (parsed.get('date_str') or '').strip()
time_str = (parsed.get('time_str') or '').strip()
if date_str and 'x_fc_thickness_datetime' in cert._fields:
from datetime import datetime
combined = ('%s %s' % (date_str, time_str)).strip()
for fmt in (
'%m/%d/%Y %I:%M:%S %p', '%m/%d/%Y %I:%M %p',
'%m/%d/%Y %H:%M:%S', '%m/%d/%Y %H:%M',
'%m/%d/%Y',
):
try:
vals['x_fc_thickness_datetime'] = datetime.strptime(
combined, fmt,
)
break
except ValueError:
continue
if vals:
cert.write(vals)
def _apply_to_cert(self):
"""Write this line's data into the cert."""
"""Write this line's data into the cert.
Order matters: operator-uploaded PNG must run LAST so it wins
over any image the RTF auto-extraction picked. Reverse order
(PNG first, then RTF) lets the WMF blow away the explicit
operator choice — exactly the bug we just hit.
"""
self.ensure_one()
cert = self.cert_id.sudo()
if not self.fischer_file:
# Just push manual readings, if any.
self._push_readings_to_cert()
# PNG-only path: still attach the operator's image upload.
self._apply_image_to_cert(cert)
return
name = (self.fischer_filename or 'fischerscope').lower()
calibration = '' # backfilled below if the parser hits
if name.endswith('.pdf'):
# Drop the PDF into the cert-local field — merges into page 2.
cert.write({
@@ -328,23 +607,107 @@ class FpCertIssueWizardLine(models.TransientModel):
'x_fc_local_thickness_pdf_filename': self.fischer_filename,
})
else:
# .doc / .docx / anything else — attach as evidence.
self.env['ir.attachment'].sudo().create({
# .doc / .docx / anything else — attach as evidence AND
# link the attachment to the cert's evidence slot so the
# thickness-required gate recognises it. Without the link,
# the gate would still raise (it checks specific fields,
# not stray attachments) and rolling back the transaction
# would orphan the upload.
att = self.env['ir.attachment'].sudo().create({
'name': self.fischer_filename or 'fischerscope-report',
'type': 'binary',
'datas': self.fischer_file,
'res_model': 'fp.certificate',
'res_id': cert.id,
})
cert.message_post(body=_(
if 'x_fc_local_thickness_evidence_id' in cert._fields:
cert.write({'x_fc_local_thickness_evidence_id': att.id})
# Re-parse the file at apply time so the report-header
# metadata (operator, product, application, etc.) makes it
# onto the cert. Onchange populates reading_line_ids but
# not the cert-level fields. Best-effort: any parse hiccup
# is logged and we still complete the attachment + readings.
try:
raw = base64.b64decode(self.fischer_file)
is_rtf = raw[:5] == b'{\\rtf'
if is_rtf:
parsed = _fp_parse_fischerscope_rtf(raw)
elif name.endswith('.docx'):
parsed = _fp_parse_fischerscope_docx(raw)
else:
parsed = None
if parsed:
self._write_thickness_metadata_to_cert(cert, parsed)
calibration = parsed.get('calibration') or ''
# WMF image extraction is RTF-only (the .docx path
# uses python-docx which already gives PIL-readable
# bitmaps; that flow can be added later if needed).
if is_rtf and 'x_fc_thickness_image_id' in cert._fields:
pngs = _fp_extract_rtf_images(raw)
img_bytes, img_w, img_h = _fp_pick_microscope_image(pngs)
if img_bytes:
img_att = self.env['ir.attachment'].sudo().create({
'name': '%s-microscope.png' % (
(self.fischer_filename or 'fischerscope')
.rsplit('.', 1)[0]
),
'type': 'binary',
'datas': base64.b64encode(img_bytes),
'mimetype': 'image/png',
'res_model': 'fp.certificate',
'res_id': cert.id,
})
cert.write({
'x_fc_thickness_image_id': img_att.id,
})
_logger.info(
'Cert %s: attached microscope image '
'(%dx%d, %d bytes)',
cert.name, img_w, img_h, len(img_bytes),
)
except Exception as exc:
_logger.warning(
'Cert %s: Fischerscope metadata extraction failed: %s',
cert.name, exc,
)
cert.message_post(body=Markup(_(
'Fischerscope file <b>%s</b> attached via Issue wizard.'
) % (self.fischer_filename or 'unnamed'))
self._push_readings_to_cert()
)) % (self.fischer_filename or 'unnamed'))
self._push_readings_to_cert(calibration=calibration)
# Operator's PNG upload wins over auto-extracted WMF — runs
# last so it overwrites x_fc_thickness_image_id if both paths
# supplied an image.
self._apply_image_to_cert(cert)
def _push_readings_to_cert(self):
def _apply_image_to_cert(self, cert):
"""Attach the operator-uploaded PNG/JPEG and link it to the
cert's image slot so the CoC report can render it inline.
No-op when nothing was uploaded. Mirrors the evidence-file
pattern: file is attached as a regular ir.attachment AND
linked to the dedicated field so the report template can
find it predictably.
"""
self.ensure_one()
if not self.fischer_image_file or \
'x_fc_thickness_image_id' not in cert._fields:
return
att = self.env['ir.attachment'].sudo().create({
'name': self.fischer_image_filename or 'thickness-image.png',
'type': 'binary',
'datas': self.fischer_image_file,
'res_model': 'fp.certificate',
'res_id': cert.id,
})
cert.write({'x_fc_thickness_image_id': att.id})
def _push_readings_to_cert(self, calibration=''):
"""Create fp.thickness.reading rows on the cert from wizard rows.
Skips when no rows. Does not deduplicate against existing
readings — the manager has just told us this is the new data."""
readings — the manager has just told us this is the new data.
Per-reading calibration_std_ref is stamped from the optional
`calibration` arg so the printed CoC's calibration line stays
accurate even when readings are re-pushed from a fresh upload.
"""
self.ensure_one()
Reading = self.env.get('fp.thickness.reading')
if Reading is None or not self.reading_line_ids:
@@ -358,6 +721,8 @@ class FpCertIssueWizardLine(models.TransientModel):
}
if 'reading_number' in Reading._fields:
vals['reading_number'] = r.sequence
if calibration and 'calibration_std_ref' in Reading._fields:
vals['calibration_std_ref'] = calibration
Reading.sudo().create(vals)

View File

@@ -93,6 +93,23 @@
<field name="fischer_filename"
invisible="1"/>
</group>
<group string="Measurement Image (Optional)"
invisible="not needs_thickness">
<field name="fischer_image_file"
filename="fischer_image_filename"
widget="image"
options="{'size': [200, 200]}"/>
<field name="fischer_image_filename"
invisible="1"/>
<div colspan="2" class="text-muted small">
Drop a PNG/JPEG of the coupon
under the XRF probe (export
from the XDAL 600 software's
Image menu). Rendered inline on
the printed CoC so the customer
sees the actual measurement.
</div>
</group>
<div class="alert alert-info"
role="alert"
invisible="not needs_thickness or not parsed_summary">