"""Utility functions for parsing financial statement data."""

import logging
import re
from datetime import datetime
from typing import Optional

logger = logging.getLogger(__name__)


# Supported date formats for parsing
SUPPORTED_DATE_FORMATS = [
    "%m/%d/%Y",  # 01/15/2024
    "%m/%d/%y",  # 01/15/24
    "%m-%d-%Y",  # 01-15-2024
    "%m-%d-%y",  # 01-15-24
    "%Y-%m-%d",  # 2024-01-15 (ISO format)
]


def normalize_date(date_str: str) -> Optional[str]:
    """Convert date string to ISO format (YYYY-MM-DD).

    Supports multiple input formats:
    - MM/DD/YYYY, M/D/YYYY
    - MM/DD/YY, M/D/YY
    - MM-DD-YYYY, MM-DD-YY
    - YYYY-MM-DD (passthrough)

    Args:
        date_str: Date string in various formats

    Returns:
        ISO format date string (YYYY-MM-DD) or None if parsing fails
    """
    date_str = date_str.strip()

    for fmt in SUPPORTED_DATE_FORMATS:
        try:
            parsed = datetime.strptime(date_str, fmt)
            return parsed.strftime("%Y-%m-%d")
        except ValueError:
            continue

    logger.warning(f"Could not parse date: {date_str}")
    return None


def normalize_date_with_year(date_str: str, default_year: int) -> Optional[str]:
    """Normalize date string to ISO format, inferring year if needed.

    Handles MM/DD format (common in statements) by appending the default year.

    Args:
        date_str: Date string (e.g., "01/15" or "01/15/2024")
        default_year: Year to use when only MM/DD is provided

    Returns:
        ISO format date string (YYYY-MM-DD) or None if parsing fails
    """
    date_str = date_str.strip()

    # Handle MM/DD format (no year)
    if re.match(r"^\d{1,2}/\d{1,2}$", date_str):
        date_str = f"{date_str}/{default_year}"

    return normalize_date(date_str)


def parse_amount(amount_str: str) -> Optional[float]:
    """Parse amount string to float, handling currency symbols and signs.

    Handles:
    - Currency symbols ($)
    - Commas in numbers (1,234.56)
    - Parentheses for negatives: ($100.00) -> -100.00
    - Trailing minus for credits: $100.00- -> -100.00
    - Leading minus: -$100.00 -> -100.00

    Args:
        amount_str: Amount string (e.g., "$1,234.56", "($100.00)", "$50.00-")

    Returns:
        Float amount or None if parsing fails
    """
    if not amount_str:
        return None

    amount_str = amount_str.strip()

    try:
        is_negative = False

        # Parentheses indicate negative: ($100.00)
        if amount_str.startswith("(") and amount_str.endswith(")"):
            is_negative = True
            amount_str = amount_str[1:-1]
        # Trailing minus indicates credit (negative): $100.00-
        elif amount_str.endswith("-"):
            is_negative = True
            amount_str = amount_str[:-1]
        # Leading minus: -$100.00
        elif amount_str.startswith("-"):
            is_negative = True
            amount_str = amount_str[1:]

        # Remove currency symbol and commas
        amount_str = amount_str.replace("$", "").replace(",", "").strip()

        # Parse the numeric value
        value = float(amount_str)

        return -value if is_negative else value

    except (ValueError, AttributeError) as e:
        logger.warning(f"Could not parse amount: {amount_str} - {e}")
        return None


def parse_currency_value(value_str: str) -> float:
    """Parse a currency value string to float (always positive).

    Unlike parse_amount, this doesn't handle negative indicators.
    Used for parsing values from tables where sign is determined by context.

    Args:
        value_str: Currency string (e.g., "$1,234.56" or "1234.56")

    Returns:
        Float value, or 0.0 if parsing fails
    """
    try:
        cleaned = str(value_str).replace("$", "").replace(",", "").strip()
        return float(cleaned) if cleaned else 0.0
    except (ValueError, AttributeError):
        return 0.0


def extract_currency_from_text(text: str) -> Optional[float]:
    """Extract first currency value found in text.

    Args:
        text: Text containing a currency value

    Returns:
        Float value or None if not found
    """
    match = re.search(r"\$?([\d,]+\.?\d*)", text)
    if match:
        return parse_currency_value(match.group(1))
    return None


def format_amount(amount: float) -> str:
    """Format amount as currency string.

    Positive amounts: $100.00
    Negative amounts: $100.00- (trailing minus for credits)

    Args:
        amount: Numeric amount

    Returns:
        Formatted currency string
    """
    if amount < 0:
        return f"${abs(amount):,.2f}-"
    else:
        return f"${amount:,.2f}"


def format_date_for_statement(iso_date: str) -> str:
    """Format ISO date back to MM/DD/YYYY format.

    Args:
        iso_date: Date in YYYY-MM-DD format

    Returns:
        Date in MM/DD/YYYY format
    """
    date_obj = datetime.strptime(iso_date, "%Y-%m-%d")
    return date_obj.strftime("%m/%d/%Y")


def extract_statement_year(raw_text: str) -> Optional[int]:
    """Extract the statement year from the statement period text.

    Looks for pattern like "Statement period: MM/DD/YY - MM/DD/YY"

    Args:
        raw_text: Raw text from statement

    Returns:
        Year as integer or None if not found
    """
    match = re.search(r"Statement\s+period[:\s]+\d{1,2}/\d{1,2}/(\d{2,4})", raw_text)
    if match:
        year_str = match.group(1)
        year = int(year_str)
        # Handle 2-digit years
        if year < 100:
            year = 2000 + year if year < 50 else 1900 + year
        return year
    return None
