Initial commit

This commit is contained in:
gsinghpal
2026-02-22 01:22:18 -05:00
commit 5200d5baf0
2394 changed files with 386834 additions and 0 deletions

View File

@@ -0,0 +1,458 @@
# Fusion Accounting - OFX Bank Statement Parser
# Original implementation for Open Financial Exchange v1 (SGML) and v2 (XML)
# Based on the published OFX specification (https://www.ofx.net/spec)
import logging
import re
from datetime import datetime
from xml.etree import ElementTree
from odoo import _, models
from odoo.exceptions import UserError
_log = logging.getLogger(__name__)
class FusionOFXParser:
"""Standalone parser for OFX (Open Financial Exchange) files.
Supports both OFX v1 (SGML-like markup without closing tags) and
OFX v2 (well-formed XML). The parser normalises either dialect into
a common intermediate structure before extracting statement data.
This is an **original** implementation written from the published
OFX 1.6 / 2.2 specification — it is not derived from Odoo Enterprise.
"""
# OFX date format: YYYYMMDDHHMMSS[.XXX[:TZ]] — timezone and fractional
# seconds are optional; many banks only emit YYYYMMDD.
_OFX_DATE_RE = re.compile(
r'^(\d{4})(\d{2})(\d{2})' # YYYYMMDD (required)
r'(?:(\d{2})(\d{2})(\d{2}))?' # HHMMSS (optional)
r'(?:\.\d+)?' # .XXX (optional fractional)
r'(?:\[.*\])?$' # [:TZ] (optional timezone)
)
# SGML self-closing tags used in OFX v1 (no closing tag counterpart).
# These contain scalar data directly after the tag.
_SGML_LEAF_TAGS = {
'TRNTYPE', 'DTPOSTED', 'DTUSER', 'DTSTART', 'DTEND',
'TRNAMT', 'FITID', 'CHECKNUM', 'REFNUM', 'NAME', 'MEMO',
'PAYEEID', 'ACCTID', 'BANKID', 'BRANCHID', 'ACCTTYPE',
'BALAMT', 'DTASOF', 'CURDEF', 'SEVERITY', 'CODE', 'MESSAGE',
'SIC', 'PAYEEID', 'CORRECTFITID', 'CORRECTACTION',
'SRVRTID', 'CLRTID',
}
# -------------------------------------------------------------------
# Public API
# -------------------------------------------------------------------
def parse_ofx(self, data_file):
"""Parse an OFX file (bytes or str) and return a list of statement
dicts compatible with the Fusion Accounting import pipeline.
Each dict has the keys:
- ``name`` : statement identifier
- ``date`` : closing date (datetime.date)
- ``balance_start`` : opening balance (float)
- ``balance_end_real``: closing balance (float)
- ``currency_code`` : ISO 4217 currency code
- ``account_number`` : bank account number
- ``transactions`` : list of transaction dicts
Transaction dicts contain:
- ``date`` : posting date (datetime.date)
- ``payment_ref`` : description / memo
- ``ref`` : FITID or reference number
- ``amount`` : signed float (negative = debit)
- ``unique_import_id`` : unique per-transaction identifier
- ``transaction_type`` : OFX TRNTYPE value
"""
raw = self._to_text(data_file)
# Determine OFX dialect and obtain an ElementTree root
if self._is_ofx_v2(raw):
root = self._parse_xml(raw)
else:
root = self._parse_sgml(raw)
return self._extract_statements(root)
# -------------------------------------------------------------------
# Input normalisation
# -------------------------------------------------------------------
@staticmethod
def _to_text(data_file):
"""Ensure *data_file* is a string, decoding bytes if necessary."""
if isinstance(data_file, bytes):
# Try UTF-8 first; fall back to Latin-1 (lossless for any byte)
for encoding in ('utf-8-sig', 'utf-8', 'latin-1'):
try:
return data_file.decode(encoding)
except UnicodeDecodeError:
continue
return data_file
@staticmethod
def _is_ofx_v2(text):
"""Return True when *text* looks like OFX v2 (XML) rather than
SGML-based v1. OFX v2 begins with an XML processing instruction
or a ``<?OFX …?>`` header."""
stripped = text.lstrip()
return stripped.startswith('<?xml') or stripped.startswith('<?OFX')
# -------------------------------------------------------------------
# OFX v2 (XML) parser
# -------------------------------------------------------------------
def _parse_xml(self, text):
"""Parse well-formed OFX v2 XML and return the root Element."""
try:
return ElementTree.fromstring(text.encode('utf-8'))
except ElementTree.ParseError as exc:
raise UserError(
_("Failed to parse OFX XML file: %s", str(exc))
) from exc
# -------------------------------------------------------------------
# OFX v1 (SGML) parser — convert to XML then parse
# -------------------------------------------------------------------
def _parse_sgml(self, text):
"""Convert an OFX v1 SGML document into well-formed XML and
return the root Element.
The SGML dialect used by OFX v1 has two kinds of tags:
* **Aggregate** tags like ``<STMTTRNRS>`` which contain child
elements and always have a matching ``</STMTTRNRS>``.
* **Leaf** (data) tags like ``<TRNAMT>-42.50`` which carry a
scalar value and are never explicitly closed.
The conversion strategy inserts explicit close tags for every
leaf element so that the result is valid XML.
"""
# Strip the SGML headers (everything before the first ``<OFX>``).
ofx_idx = text.upper().find('<OFX>')
if ofx_idx == -1:
raise UserError(_("The file does not contain a valid OFX document."))
body = text[ofx_idx:]
# Normalise whitespace inside tags: collapse runs of whitespace
# between ``>`` and ``<`` but preserve data values.
lines = body.splitlines()
xml_lines = []
for line in lines:
stripped = line.strip()
if not stripped:
continue
xml_lines.append(stripped)
joined = '\n'.join(xml_lines)
# Insert closing tags for leaf elements.
# A leaf tag looks like ``<TAGNAME>value`` (no ``</TAGNAME>`` follows).
def _close_leaf_tags(sgml_text):
"""Insert ``</TAG>`` after each leaf tag's data value."""
result = []
tag_re = re.compile(r'<(/?)(\w+)>(.*)', re.DOTALL)
for raw_line in sgml_text.split('\n'):
raw_line = raw_line.strip()
if not raw_line:
continue
m = tag_re.match(raw_line)
if m:
is_close = m.group(1) == '/'
tag_name = m.group(2).upper()
rest = m.group(3).strip()
if is_close:
result.append(f'</{tag_name}>')
elif tag_name in self._SGML_LEAF_TAGS:
# Leaf element: value sits between open and (missing) close tag
data_val = rest.split('<')[0].strip() if '<' in rest else rest
result.append(f'<{tag_name}>{self._xml_escape(data_val)}</{tag_name}>')
# If the rest of the line has another tag, process it
if '<' in rest:
leftover = rest[rest.index('<'):]
for extra in _close_leaf_tags(leftover).split('\n'):
if extra.strip():
result.append(extra.strip())
else:
# Aggregate (container) tag — keep as-is
result.append(f'<{tag_name}>')
if rest:
for extra in _close_leaf_tags(rest).split('\n'):
if extra.strip():
result.append(extra.strip())
else:
result.append(raw_line)
return '\n'.join(result)
xml_text = _close_leaf_tags(joined)
try:
return ElementTree.fromstring(xml_text.encode('utf-8'))
except ElementTree.ParseError as exc:
_log.debug("SGML→XML conversion result:\n%s", xml_text[:2000])
raise UserError(
_("Failed to parse OFX v1 (SGML) file. The file may be "
"corrupt or in an unsupported dialect: %s", str(exc))
) from exc
@staticmethod
def _xml_escape(text):
"""Escape XML-special characters in *text*."""
return (
text.replace('&', '&amp;')
.replace('<', '&lt;')
.replace('>', '&gt;')
.replace('"', '&quot;')
.replace("'", '&apos;')
)
# -------------------------------------------------------------------
# Data extraction
# -------------------------------------------------------------------
def _extract_statements(self, root):
"""Walk the parsed OFX element tree and collect statement data.
Supports ``BANKMSGSRSV1`` (bank accounts) and ``CCMSGSRSV1``
(credit-card accounts).
"""
statements = []
# Locate all statement response containers
for tag_suffix, acct_tag in [
('BANKMSGSRSV1', 'BANKACCTFROM'),
('CCMSGSRSV1', 'CCACCTFROM'),
]:
for stmtrs in self._find_all(root, 'STMTRS') + self._find_all(root, 'CCSTMTRS'):
stmt = self._extract_single_statement(stmtrs, acct_tag)
if stmt:
statements.append(stmt)
if not statements:
raise UserError(
_("No bank or credit-card statements found in the OFX file.")
)
return statements
def _extract_single_statement(self, stmtrs, acct_tag):
"""Extract one statement from a ``<STMTRS>`` or ``<CCSTMTRS>``
element."""
# Currency
currency = self._find_text(stmtrs, 'CURDEF') or ''
# Account number
acct_elem = self._find_first(stmtrs, acct_tag)
if acct_elem is None:
acct_elem = self._find_first(stmtrs, 'BANKACCTFROM')
if acct_elem is None:
acct_elem = self._find_first(stmtrs, 'CCACCTFROM')
acct_number = ''
if acct_elem is not None:
acct_number = self._find_text(acct_elem, 'ACCTID') or ''
# Transaction list
txn_list_el = self._find_first(stmtrs, 'BANKTRANLIST')
if txn_list_el is None:
txn_list_el = stmtrs # CCSTMTRS may put transactions directly inside
start_date = self._parse_ofx_date(self._find_text(txn_list_el, 'DTSTART'))
end_date = self._parse_ofx_date(self._find_text(txn_list_el, 'DTEND'))
transactions = []
for stmttrn in self._find_all(txn_list_el, 'STMTTRN'):
txn = self._extract_transaction(stmttrn)
if txn:
transactions.append(txn)
# Balances — look for LEDGERBAL and AVAILBAL
balance_start = 0.0
balance_end = 0.0
ledger_bal = self._find_first(stmtrs, 'LEDGERBAL')
if ledger_bal is not None:
balance_end = self._safe_float(self._find_text(ledger_bal, 'BALAMT'))
avail_bal = self._find_first(stmtrs, 'AVAILBAL')
if avail_bal is not None and ledger_bal is None:
balance_end = self._safe_float(self._find_text(avail_bal, 'BALAMT'))
# Derive opening balance: opening = closing sum(transactions)
txn_total = sum(t['amount'] for t in transactions)
balance_start = balance_end - txn_total
stmt_date = end_date or (start_date if start_date else None)
stmt_name = f"OFX {acct_number}" if acct_number else "OFX Import"
if stmt_date:
stmt_name += f" {stmt_date.strftime('%Y-%m-%d')}"
return {
'name': stmt_name,
'date': stmt_date,
'balance_start': balance_start,
'balance_end_real': balance_end,
'currency_code': currency.upper() if currency else None,
'account_number': acct_number,
'transactions': transactions,
}
def _extract_transaction(self, stmttrn):
"""Extract a single transaction from a ``<STMTTRN>`` element."""
trntype = self._find_text(stmttrn, 'TRNTYPE') or ''
dt_posted = self._parse_ofx_date(self._find_text(stmttrn, 'DTPOSTED'))
dt_user = self._parse_ofx_date(self._find_text(stmttrn, 'DTUSER'))
amount = self._safe_float(self._find_text(stmttrn, 'TRNAMT'))
fitid = self._find_text(stmttrn, 'FITID') or ''
checknum = self._find_text(stmttrn, 'CHECKNUM') or ''
refnum = self._find_text(stmttrn, 'REFNUM') or ''
name = self._find_text(stmttrn, 'NAME') or ''
memo = self._find_text(stmttrn, 'MEMO') or ''
# Build description: prefer NAME, append MEMO if different
description = name
if memo and memo != name:
description = f"{name} - {memo}" if name else memo
# Build reference: FITID is the primary unique ID; CHECKNUM or REFNUM
# serve as human-readable reference
ref = checknum or refnum or fitid
unique_id = fitid
return {
'date': dt_user or dt_posted,
'payment_ref': description or ref or '/',
'ref': ref,
'amount': amount,
'unique_import_id': unique_id,
'transaction_type': trntype,
}
# -------------------------------------------------------------------
# Element-tree helpers (case-insensitive tag search)
# -------------------------------------------------------------------
@staticmethod
def _find_all(parent, tag):
"""Find all descendant elements whose tag matches *tag*
(case-insensitive)."""
tag_upper = tag.upper()
return [el for el in parent.iter() if el.tag.upper() == tag_upper]
@staticmethod
def _find_first(parent, tag):
"""Return the first descendant matching *tag* (case-insensitive)
or ``None``."""
tag_upper = tag.upper()
for el in parent.iter():
if el.tag.upper() == tag_upper:
return el
return None
@classmethod
def _find_text(cls, parent, tag):
"""Return stripped text content of the first descendant matching
*tag*, or ``None``."""
el = cls._find_first(parent, tag)
if el is not None and el.text:
return el.text.strip()
return None
# -------------------------------------------------------------------
# Date / numeric helpers
# -------------------------------------------------------------------
@classmethod
def _parse_ofx_date(cls, date_str):
"""Parse an OFX date string (``YYYYMMDD…``) into a Python date."""
if not date_str:
return None
m = cls._OFX_DATE_RE.match(date_str.strip())
if not m:
# Fallback: try basic YYYYMMDD
try:
return datetime.strptime(date_str.strip()[:8], '%Y%m%d').date()
except (ValueError, IndexError):
_log.warning("Unparseable OFX date: %s", date_str)
return None
year, month, day = int(m.group(1)), int(m.group(2)), int(m.group(3))
try:
return datetime(year, month, day).date()
except ValueError:
_log.warning("Invalid OFX date components: %s", date_str)
return None
@staticmethod
def _safe_float(value):
"""Convert *value* to float, returning 0.0 for empty / invalid."""
if not value:
return 0.0
try:
return float(value.replace(',', '.'))
except (ValueError, AttributeError):
return 0.0
class FusionJournalOFXImport(models.Model):
"""Register OFX as an available bank-statement import format and
implement the parser hook on ``account.journal``."""
_inherit = 'account.journal'
# ---- Format Registration ----
def _get_bank_statements_available_import_formats(self):
"""Append OFX to the list of importable formats."""
formats = super()._get_bank_statements_available_import_formats()
formats.append('OFX')
return formats
# ---- Parser Hook ----
def _parse_bank_statement_file(self, attachment):
"""Attempt to parse *attachment* as OFX. Falls through to
``super()`` when the file is not recognised as OFX."""
raw_data = attachment.raw
if not self._is_ofx_file(raw_data):
return super()._parse_bank_statement_file(attachment)
parser = FusionOFXParser()
try:
statements = parser.parse_ofx(raw_data)
except UserError:
raise
except Exception as exc:
_log.exception("OFX parsing error")
raise UserError(
_("Could not parse the OFX file: %s", str(exc))
) from exc
# The import pipeline expects (currency_code, account_number, stmts)
currency_code = None
account_number = None
if statements:
currency_code = statements[0].get('currency_code')
account_number = statements[0].get('account_number')
return currency_code, account_number, statements
# ---- Detection ----
@staticmethod
def _is_ofx_file(raw_data):
"""Heuristic check: does *raw_data* look like an OFX file?"""
try:
text = raw_data.decode('utf-8-sig', errors='ignore')[:4096]
except (UnicodeDecodeError, AttributeError):
text = str(raw_data)[:4096]
text_upper = text.upper()
# OFX v2 (XML)
if '<?OFX' in text_upper or '<OFX>' in text_upper:
return True
# OFX v1 (SGML header markers)
if 'OFXHEADER:' in text_upper:
return True
return False