Odoo-Modules/Fusion Accounting/models/bank_statement_import_qif.py

# Fusion Accounting - QIF Bank Statement Parser
# Original implementation for Quicken Interchange Format files
# Based on the published QIF specification

import logging
import re
from datetime import datetime

from odoo import _, models
from odoo.exceptions import UserError

_log = logging.getLogger(__name__)


class FusionQIFParser:
    """Standalone parser for QIF (Quicken Interchange Format) files.

    QIF is a plain-text format where each field occupies its own line,
    prefixed by a single-character code:

        D   Date of the transaction
        T   Amount (net)
        U   Amount (duplicate field, same meaning as T)
        P   Payee name
        N   Check number or reference
        M   Memo / description
        L   Category or transfer account
        A   Address line (up to 6 lines)
        C   Cleared status (*/c/X/R)
        ^   End-of-record separator

    Sections are introduced by a ``!Type:`` header line.

    This is an **original** implementation written from the published
    QIF specification — it is not derived from Odoo Enterprise.
    """

    # Supported QIF date formats (US mm/dd/yyyy is most common, but
    # dd/mm/yyyy and yyyy-mm-dd also appear in the wild).
    _DATE_FORMATS = [
        '%m/%d/%Y',      # 01/31/2025
        '%m/%d/%y',      # 01/31/25
        '%m-%d-%Y',      # 01-31-2025
        '%m-%d-%y',      # 01-31-25
        '%d/%m/%Y',      # 31/01/2025
        '%d/%m/%y',      # 31/01/25
        '%d-%m-%Y',      # 31-01-2025
        '%d-%m-%y',      # 31-01-25
        '%Y-%m-%d',      # 2025-01-31
        '%Y/%m/%d',      # 2025/01/31
        "%m/%d'%Y",      # 1/31'2025  (Quicken short-year)
        "%m/%d'%y",      # 1/31'25
    ]

    # -------------------------------------------------------------------
    # Public API
    # -------------------------------------------------------------------

    def parse_qif(self, data_file):
        """Parse a QIF file and return a statement dict compatible with
        the Fusion Accounting import pipeline.

        Returns a **single** dict (QIF files describe one account):
            - ``name``            : generated statement identifier
            - ``date``            : last transaction date
            - ``balance_start``   : 0.0 (QIF does not carry balances)
            - ``balance_end_real``: 0.0
            - ``transactions``    : list of transaction dicts

        Transaction dicts contain:
            - ``date``              : transaction date (datetime.date)
            - ``payment_ref``       : payee / memo
            - ``ref``               : check number / reference
            - ``amount``            : signed float
            - ``unique_import_id``  : generated unique key
        """
        text = self._to_text(data_file)
        lines = text.splitlines()

        # Detect account type from the header (optional)
        account_type = self._detect_account_type(lines)

        # Split the record stream at ``^`` separators
        records = self._split_records(lines)

        if not records:
            raise UserError(
                _("The QIF file contains no transaction records.")
            )

        transactions = []
        for idx, rec in enumerate(records):
            txn = self._parse_record(rec, idx)
            if txn:
                transactions.append(txn)

        if not transactions:
            raise UserError(
                _("No valid transactions could be extracted from the QIF file.")
            )

        # Build statement metadata
        dates = [t['date'] for t in transactions if t.get('date')]
        last_date = max(dates) if dates else None
        first_date = min(dates) if dates else None

        stmt_name = "QIF Import"
        if last_date:
            stmt_name = f"QIF {last_date.strftime('%Y-%m-%d')}"

        return {
            'name': stmt_name,
            'date': last_date,
            'balance_start': 0.0,
            'balance_end_real': 0.0,
            'account_type': account_type,
            'transactions': transactions,
        }

    # -------------------------------------------------------------------
    # Text handling
    # -------------------------------------------------------------------

    @staticmethod
    def _to_text(data_file):
        """Ensure *data_file* is a string."""
        if isinstance(data_file, bytes):
            for encoding in ('utf-8-sig', 'utf-8', 'latin-1'):
                try:
                    return data_file.decode(encoding)
                except UnicodeDecodeError:
                    continue
        return data_file

    # -------------------------------------------------------------------
    # Account-type detection
    # -------------------------------------------------------------------

    @staticmethod
    def _detect_account_type(lines):
        """Return the QIF account type from a ``!Type:`` header, or
        ``'Bank'`` as the default."""
        for line in lines:
            stripped = line.strip()
            if stripped.upper().startswith('!TYPE:'):
                return stripped[6:].strip()
        return 'Bank'

    # -------------------------------------------------------------------
    # Record splitting
    # -------------------------------------------------------------------

    @staticmethod
    def _split_records(lines):
        """Split *lines* into a list of record-lists, using ``^`` as the
        record separator.  Header lines (``!``) are skipped."""
        records = []
        current = []
        for line in lines:
            stripped = line.strip()
            if not stripped:
                continue
            if stripped.startswith('!'):
                # Header / type declaration — skip
                continue
            if stripped == '^':
                if current:
                    records.append(current)
                    current = []
            else:
                current.append(stripped)
        # Trailing record without final ``^``
        if current:
            records.append(current)
        return records

    # -------------------------------------------------------------------
    # Single-record parsing
    # -------------------------------------------------------------------

    def _parse_record(self, field_lines, record_index):
        """Parse a list of single-char-prefixed field lines into a
        transaction dict."""
        fields = {}
        address_lines = []

        for line in field_lines:
            if len(line) < 1:
                continue
            code = line[0]
            value = line[1:].strip()

            if code == 'D':
                fields['date_str'] = value
            elif code == 'T':
                fields['amount'] = value
            elif code == 'U':
                # Duplicate amount field — use only if T is missing
                if 'amount' not in fields:
                    fields['amount'] = value
            elif code == 'P':
                fields['payee'] = value
            elif code == 'N':
                fields['number'] = value
            elif code == 'M':
                fields['memo'] = value
            elif code == 'L':
                fields['category'] = value
            elif code == 'C':
                fields['cleared'] = value
            elif code == 'A':
                address_lines.append(value)
            # Other codes (S, E, $, %) are split-transaction markers;
            # they are uncommon in bank exports and are ignored here.

        if address_lines:
            fields['address'] = ', '.join(address_lines)

        # Amount is mandatory
        amount = self._parse_amount(fields.get('amount', ''))
        if amount is None:
            return None

        txn_date = self._parse_qif_date(fields.get('date_str', ''))
        payee = fields.get('payee', '')
        memo = fields.get('memo', '')
        number = fields.get('number', '')

        # Build description
        description = payee
        if memo and memo != payee:
            description = f"{payee} - {memo}" if payee else memo

        # Generate a unique import ID from available data
        unique_parts = [
            txn_date.isoformat() if txn_date else str(record_index),
            str(amount),
            payee or memo or str(record_index),
        ]
        if number:
            unique_parts.append(number)
        unique_id = 'QIF-' + '-'.join(unique_parts)

        return {
            'date': txn_date,
            'payment_ref': description or number or '/',
            'ref': number,
            'amount': amount,
            'unique_import_id': unique_id,
        }

    # -------------------------------------------------------------------
    # Date parsing
    # -------------------------------------------------------------------

    @classmethod
    def _parse_qif_date(cls, date_str):
        """Try multiple date formats and return the first successful
        parse as a ``datetime.date``, or ``None``."""
        if not date_str:
            return None

        # Normalise Quicken apostrophe-year notation: 1/31'2025 → 1/31/2025
        normalised = date_str.replace("'", "/")

        for fmt in cls._DATE_FORMATS:
            try:
                return datetime.strptime(normalised, fmt).date()
            except ValueError:
                continue

        _log.warning("Unparseable QIF date: %s", date_str)
        return None

    # -------------------------------------------------------------------
    # Amount parsing
    # -------------------------------------------------------------------

    @staticmethod
    def _parse_amount(raw):
        """Parse a QIF amount string.  Handles commas as thousand
        separators or as decimal separators (European style)."""
        if not raw:
            return None
        # Remove currency symbols and whitespace
        cleaned = re.sub(r'[^\d.,\-+]', '', raw)
        if not cleaned:
            return None

        # Determine decimal separator heuristic:
        # If both comma and period present, the last one is the decimal sep.
        if ',' in cleaned and '.' in cleaned:
            last_comma = cleaned.rfind(',')
            last_period = cleaned.rfind('.')
            if last_comma > last_period:
                # European: 1.234,56
                cleaned = cleaned.replace('.', '').replace(',', '.')
            else:
                # US: 1,234.56
                cleaned = cleaned.replace(',', '')
        elif ',' in cleaned:
            # Could be thousand separator (1,234) or decimal (1,23)
            parts = cleaned.split(',')
            if len(parts) == 2 and len(parts[1]) <= 2:
                # Likely decimal separator
                cleaned = cleaned.replace(',', '.')
            else:
                # Likely thousand separator
                cleaned = cleaned.replace(',', '')

        try:
            return float(cleaned)
        except ValueError:
            return None


class FusionJournalQIFImport(models.Model):
    """Register QIF as an available bank-statement import format and
    implement the parser hook on ``account.journal``."""

    _inherit = 'account.journal'

    # ---- Format Registration ----
    def _get_bank_statements_available_import_formats(self):
        """Append QIF to the list of importable formats."""
        formats = super()._get_bank_statements_available_import_formats()
        formats.append('QIF')
        return formats

    # ---- Parser Hook ----
    def _parse_bank_statement_file(self, attachment):
        """Attempt to parse *attachment* as QIF.  Falls through to
        ``super()`` when the file is not recognised as QIF."""
        raw_data = attachment.raw
        if not self._is_qif_file(raw_data):
            return super()._parse_bank_statement_file(attachment)

        parser = FusionQIFParser()
        try:
            stmt = parser.parse_qif(raw_data)
        except UserError:
            raise
        except Exception as exc:
            _log.exception("QIF parsing error")
            raise UserError(
                _("Could not parse the QIF file: %s", str(exc))
            ) from exc

        # QIF does not carry account-number or currency metadata
        currency_code = None
        account_number = None

        # Wrap the single statement in a list for the pipeline
        return currency_code, account_number, [stmt]

    # ---- Detection ----
    @staticmethod
    def _is_qif_file(raw_data):
        """Heuristic check: does *raw_data* look like a QIF file?"""
        try:
            text = raw_data.decode('utf-8-sig', errors='ignore')[:2048]
        except (UnicodeDecodeError, AttributeError):
            text = str(raw_data)[:2048]

        # QIF files almost always start with a !Type: or !Account: header
        # and contain ``^`` record separators.
        text_upper = text.upper().strip()
        if text_upper.startswith('!TYPE:') or text_upper.startswith('!ACCOUNT:'):
            return True

        # Fallback: look for the ``^`` separator combined with D/T field codes
        if '^' in text:
            has_date_field = bool(re.search(r'^D\d', text, re.MULTILINE))
            has_amount_field = bool(re.search(r'^T[\d\-+]', text, re.MULTILINE))
            if has_date_field and has_amount_field:
                return True

        return False