379 lines
13 KiB
Python
379 lines
13 KiB
Python
# Fusion Accounting - QIF Bank Statement Parser
|
|
# Original implementation for Quicken Interchange Format files
|
|
# Based on the published QIF specification
|
|
|
|
import logging
|
|
import re
|
|
from datetime import datetime
|
|
|
|
from odoo import _, models
|
|
from odoo.exceptions import UserError
|
|
|
|
_log = logging.getLogger(__name__)
|
|
|
|
|
|
class FusionQIFParser:
|
|
"""Standalone parser for QIF (Quicken Interchange Format) files.
|
|
|
|
QIF is a plain-text format where each field occupies its own line,
|
|
prefixed by a single-character code:
|
|
|
|
D Date of the transaction
|
|
T Amount (net)
|
|
U Amount (duplicate field, same meaning as T)
|
|
P Payee name
|
|
N Check number or reference
|
|
M Memo / description
|
|
L Category or transfer account
|
|
A Address line (up to 6 lines)
|
|
C Cleared status (*/c/X/R)
|
|
^ End-of-record separator
|
|
|
|
Sections are introduced by a ``!Type:`` header line.
|
|
|
|
This is an **original** implementation written from the published
|
|
QIF specification — it is not derived from Odoo Enterprise.
|
|
"""
|
|
|
|
# Supported QIF date formats (US mm/dd/yyyy is most common, but
|
|
# dd/mm/yyyy and yyyy-mm-dd also appear in the wild).
|
|
_DATE_FORMATS = [
|
|
'%m/%d/%Y', # 01/31/2025
|
|
'%m/%d/%y', # 01/31/25
|
|
'%m-%d-%Y', # 01-31-2025
|
|
'%m-%d-%y', # 01-31-25
|
|
'%d/%m/%Y', # 31/01/2025
|
|
'%d/%m/%y', # 31/01/25
|
|
'%d-%m-%Y', # 31-01-2025
|
|
'%d-%m-%y', # 31-01-25
|
|
'%Y-%m-%d', # 2025-01-31
|
|
'%Y/%m/%d', # 2025/01/31
|
|
"%m/%d'%Y", # 1/31'2025 (Quicken short-year)
|
|
"%m/%d'%y", # 1/31'25
|
|
]
|
|
|
|
# -------------------------------------------------------------------
|
|
# Public API
|
|
# -------------------------------------------------------------------
|
|
|
|
def parse_qif(self, data_file):
|
|
"""Parse a QIF file and return a statement dict compatible with
|
|
the Fusion Accounting import pipeline.
|
|
|
|
Returns a **single** dict (QIF files describe one account):
|
|
- ``name`` : generated statement identifier
|
|
- ``date`` : last transaction date
|
|
- ``balance_start`` : 0.0 (QIF does not carry balances)
|
|
- ``balance_end_real``: 0.0
|
|
- ``transactions`` : list of transaction dicts
|
|
|
|
Transaction dicts contain:
|
|
- ``date`` : transaction date (datetime.date)
|
|
- ``payment_ref`` : payee / memo
|
|
- ``ref`` : check number / reference
|
|
- ``amount`` : signed float
|
|
- ``unique_import_id`` : generated unique key
|
|
"""
|
|
text = self._to_text(data_file)
|
|
lines = text.splitlines()
|
|
|
|
# Detect account type from the header (optional)
|
|
account_type = self._detect_account_type(lines)
|
|
|
|
# Split the record stream at ``^`` separators
|
|
records = self._split_records(lines)
|
|
|
|
if not records:
|
|
raise UserError(
|
|
_("The QIF file contains no transaction records.")
|
|
)
|
|
|
|
transactions = []
|
|
for idx, rec in enumerate(records):
|
|
txn = self._parse_record(rec, idx)
|
|
if txn:
|
|
transactions.append(txn)
|
|
|
|
if not transactions:
|
|
raise UserError(
|
|
_("No valid transactions could be extracted from the QIF file.")
|
|
)
|
|
|
|
# Build statement metadata
|
|
dates = [t['date'] for t in transactions if t.get('date')]
|
|
last_date = max(dates) if dates else None
|
|
first_date = min(dates) if dates else None
|
|
|
|
stmt_name = "QIF Import"
|
|
if last_date:
|
|
stmt_name = f"QIF {last_date.strftime('%Y-%m-%d')}"
|
|
|
|
return {
|
|
'name': stmt_name,
|
|
'date': last_date,
|
|
'balance_start': 0.0,
|
|
'balance_end_real': 0.0,
|
|
'account_type': account_type,
|
|
'transactions': transactions,
|
|
}
|
|
|
|
# -------------------------------------------------------------------
|
|
# Text handling
|
|
# -------------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def _to_text(data_file):
|
|
"""Ensure *data_file* is a string."""
|
|
if isinstance(data_file, bytes):
|
|
for encoding in ('utf-8-sig', 'utf-8', 'latin-1'):
|
|
try:
|
|
return data_file.decode(encoding)
|
|
except UnicodeDecodeError:
|
|
continue
|
|
return data_file
|
|
|
|
# -------------------------------------------------------------------
|
|
# Account-type detection
|
|
# -------------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def _detect_account_type(lines):
|
|
"""Return the QIF account type from a ``!Type:`` header, or
|
|
``'Bank'`` as the default."""
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if stripped.upper().startswith('!TYPE:'):
|
|
return stripped[6:].strip()
|
|
return 'Bank'
|
|
|
|
# -------------------------------------------------------------------
|
|
# Record splitting
|
|
# -------------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def _split_records(lines):
|
|
"""Split *lines* into a list of record-lists, using ``^`` as the
|
|
record separator. Header lines (``!``) are skipped."""
|
|
records = []
|
|
current = []
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
if stripped.startswith('!'):
|
|
# Header / type declaration — skip
|
|
continue
|
|
if stripped == '^':
|
|
if current:
|
|
records.append(current)
|
|
current = []
|
|
else:
|
|
current.append(stripped)
|
|
# Trailing record without final ``^``
|
|
if current:
|
|
records.append(current)
|
|
return records
|
|
|
|
# -------------------------------------------------------------------
|
|
# Single-record parsing
|
|
# -------------------------------------------------------------------
|
|
|
|
def _parse_record(self, field_lines, record_index):
|
|
"""Parse a list of single-char-prefixed field lines into a
|
|
transaction dict."""
|
|
fields = {}
|
|
address_lines = []
|
|
|
|
for line in field_lines:
|
|
if len(line) < 1:
|
|
continue
|
|
code = line[0]
|
|
value = line[1:].strip()
|
|
|
|
if code == 'D':
|
|
fields['date_str'] = value
|
|
elif code == 'T':
|
|
fields['amount'] = value
|
|
elif code == 'U':
|
|
# Duplicate amount field — use only if T is missing
|
|
if 'amount' not in fields:
|
|
fields['amount'] = value
|
|
elif code == 'P':
|
|
fields['payee'] = value
|
|
elif code == 'N':
|
|
fields['number'] = value
|
|
elif code == 'M':
|
|
fields['memo'] = value
|
|
elif code == 'L':
|
|
fields['category'] = value
|
|
elif code == 'C':
|
|
fields['cleared'] = value
|
|
elif code == 'A':
|
|
address_lines.append(value)
|
|
# Other codes (S, E, $, %) are split-transaction markers;
|
|
# they are uncommon in bank exports and are ignored here.
|
|
|
|
if address_lines:
|
|
fields['address'] = ', '.join(address_lines)
|
|
|
|
# Amount is mandatory
|
|
amount = self._parse_amount(fields.get('amount', ''))
|
|
if amount is None:
|
|
return None
|
|
|
|
txn_date = self._parse_qif_date(fields.get('date_str', ''))
|
|
payee = fields.get('payee', '')
|
|
memo = fields.get('memo', '')
|
|
number = fields.get('number', '')
|
|
|
|
# Build description
|
|
description = payee
|
|
if memo and memo != payee:
|
|
description = f"{payee} - {memo}" if payee else memo
|
|
|
|
# Generate a unique import ID from available data
|
|
unique_parts = [
|
|
txn_date.isoformat() if txn_date else str(record_index),
|
|
str(amount),
|
|
payee or memo or str(record_index),
|
|
]
|
|
if number:
|
|
unique_parts.append(number)
|
|
unique_id = 'QIF-' + '-'.join(unique_parts)
|
|
|
|
return {
|
|
'date': txn_date,
|
|
'payment_ref': description or number or '/',
|
|
'ref': number,
|
|
'amount': amount,
|
|
'unique_import_id': unique_id,
|
|
}
|
|
|
|
# -------------------------------------------------------------------
|
|
# Date parsing
|
|
# -------------------------------------------------------------------
|
|
|
|
@classmethod
|
|
def _parse_qif_date(cls, date_str):
|
|
"""Try multiple date formats and return the first successful
|
|
parse as a ``datetime.date``, or ``None``."""
|
|
if not date_str:
|
|
return None
|
|
|
|
# Normalise Quicken apostrophe-year notation: 1/31'2025 → 1/31/2025
|
|
normalised = date_str.replace("'", "/")
|
|
|
|
for fmt in cls._DATE_FORMATS:
|
|
try:
|
|
return datetime.strptime(normalised, fmt).date()
|
|
except ValueError:
|
|
continue
|
|
|
|
_log.warning("Unparseable QIF date: %s", date_str)
|
|
return None
|
|
|
|
# -------------------------------------------------------------------
|
|
# Amount parsing
|
|
# -------------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def _parse_amount(raw):
|
|
"""Parse a QIF amount string. Handles commas as thousand
|
|
separators or as decimal separators (European style)."""
|
|
if not raw:
|
|
return None
|
|
# Remove currency symbols and whitespace
|
|
cleaned = re.sub(r'[^\d.,\-+]', '', raw)
|
|
if not cleaned:
|
|
return None
|
|
|
|
# Determine decimal separator heuristic:
|
|
# If both comma and period present, the last one is the decimal sep.
|
|
if ',' in cleaned and '.' in cleaned:
|
|
last_comma = cleaned.rfind(',')
|
|
last_period = cleaned.rfind('.')
|
|
if last_comma > last_period:
|
|
# European: 1.234,56
|
|
cleaned = cleaned.replace('.', '').replace(',', '.')
|
|
else:
|
|
# US: 1,234.56
|
|
cleaned = cleaned.replace(',', '')
|
|
elif ',' in cleaned:
|
|
# Could be thousand separator (1,234) or decimal (1,23)
|
|
parts = cleaned.split(',')
|
|
if len(parts) == 2 and len(parts[1]) <= 2:
|
|
# Likely decimal separator
|
|
cleaned = cleaned.replace(',', '.')
|
|
else:
|
|
# Likely thousand separator
|
|
cleaned = cleaned.replace(',', '')
|
|
|
|
try:
|
|
return float(cleaned)
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
class FusionJournalQIFImport(models.Model):
|
|
"""Register QIF as an available bank-statement import format and
|
|
implement the parser hook on ``account.journal``."""
|
|
|
|
_inherit = 'account.journal'
|
|
|
|
# ---- Format Registration ----
|
|
def _get_bank_statements_available_import_formats(self):
|
|
"""Append QIF to the list of importable formats."""
|
|
formats = super()._get_bank_statements_available_import_formats()
|
|
formats.append('QIF')
|
|
return formats
|
|
|
|
# ---- Parser Hook ----
|
|
def _parse_bank_statement_file(self, attachment):
|
|
"""Attempt to parse *attachment* as QIF. Falls through to
|
|
``super()`` when the file is not recognised as QIF."""
|
|
raw_data = attachment.raw
|
|
if not self._is_qif_file(raw_data):
|
|
return super()._parse_bank_statement_file(attachment)
|
|
|
|
parser = FusionQIFParser()
|
|
try:
|
|
stmt = parser.parse_qif(raw_data)
|
|
except UserError:
|
|
raise
|
|
except Exception as exc:
|
|
_log.exception("QIF parsing error")
|
|
raise UserError(
|
|
_("Could not parse the QIF file: %s", str(exc))
|
|
) from exc
|
|
|
|
# QIF does not carry account-number or currency metadata
|
|
currency_code = None
|
|
account_number = None
|
|
|
|
# Wrap the single statement in a list for the pipeline
|
|
return currency_code, account_number, [stmt]
|
|
|
|
# ---- Detection ----
|
|
@staticmethod
|
|
def _is_qif_file(raw_data):
|
|
"""Heuristic check: does *raw_data* look like a QIF file?"""
|
|
try:
|
|
text = raw_data.decode('utf-8-sig', errors='ignore')[:2048]
|
|
except (UnicodeDecodeError, AttributeError):
|
|
text = str(raw_data)[:2048]
|
|
|
|
# QIF files almost always start with a !Type: or !Account: header
|
|
# and contain ``^`` record separators.
|
|
text_upper = text.upper().strip()
|
|
if text_upper.startswith('!TYPE:') or text_upper.startswith('!ACCOUNT:'):
|
|
return True
|
|
|
|
# Fallback: look for the ``^`` separator combined with D/T field codes
|
|
if '^' in text:
|
|
has_date_field = bool(re.search(r'^D\d', text, re.MULTILINE))
|
|
has_amount_field = bool(re.search(r'^T[\d\-+]', text, re.MULTILINE))
|
|
if has_date_field and has_amount_field:
|
|
return True
|
|
|
|
return False
|