"""Data models for the statement processor.

This module contains Pydantic models for transactions, statements,
and processing results.
"""

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field


class Transaction(BaseModel):
    """Represents a single financial transaction.

    This model is compatible with the analytics framework which expects
    date, description, and amount fields.

    Attributes:
        date: Primary date for the transaction (transaction_date or posted_date)
        transaction_date: Date the transaction occurred (may be empty)
        posted_date: Date the transaction was posted to the account
        description: Merchant/transaction description
        amount: Transaction amount (negative for credits/payments, positive for charges)
        source_file: Original PDF filename
        category: Recurrence category: "monthly", "yearly", or "one-time"
    """

    date: str = Field(..., description="Primary date in ISO format (YYYY-MM-DD)")
    transaction_date: Optional[str] = Field(
        default=None, description="Transaction date in ISO format"
    )
    posted_date: Optional[str] = Field(
        default=None, description="Posted date in ISO format"
    )
    description: str = Field(..., description="Merchant/transaction description")
    amount: float = Field(
        ..., description="Transaction amount (negative for credits, positive for charges)"
    )
    source_file: str = Field(default="", description="Original PDF filename")
    category: str = Field(
        default="", description="Recurrence category: 'monthly', 'yearly', or 'one-time'"
    )

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for DataFrame creation."""
        return self.model_dump()


class StatementMetadata(BaseModel):
    """Metadata extracted from a financial statement.

    This is a flexible container for statement-level information.
    Different parsers may populate different fields based on
    what's available in their statement format.

    Attributes:
        account_number_last4: Last 4 digits of account number
        statement_period_start: Start date of statement period
        statement_period_end: End date of statement period
        payment_due_date: Payment due date
        minimum_payment_due: Minimum payment amount due
        institution_name: Name of the financial institution
        account_type: Type of account (credit card, checking, etc.)
        extra: Additional parser-specific metadata
    """

    account_number_last4: str = Field(
        default="", description="Last 4 digits of account number"
    )
    statement_period_start: Optional[str] = Field(
        default=None, description="Start date of statement period (YYYY-MM-DD)"
    )
    statement_period_end: Optional[str] = Field(
        default=None, description="End date of statement period (YYYY-MM-DD)"
    )
    payment_due_date: Optional[str] = Field(
        default=None, description="Payment due date (YYYY-MM-DD)"
    )
    minimum_payment_due: float = Field(
        default=0.0, description="Minimum payment amount due"
    )
    institution_name: str = Field(
        default="", description="Name of the financial institution"
    )
    account_type: str = Field(
        default="", description="Type of account (credit card, checking, etc.)"
    )
    extra: Dict[str, Any] = Field(
        default_factory=dict, description="Additional parser-specific metadata"
    )


class Statement(BaseModel):
    """Complete parsed financial statement.

    Attributes:
        source_file: Original PDF filename
        metadata: Statement metadata (account info, dates, summary)
        transactions: List of transactions from the statement
    """

    source_file: str = Field(..., description="Original PDF filename")
    metadata: StatementMetadata = Field(
        default_factory=StatementMetadata, description="Statement metadata"
    )
    transactions: List[Transaction] = Field(
        default_factory=list, description="List of transactions"
    )

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for serialization."""
        return self.model_dump()


class ProcessingResult(BaseModel):
    """Result of processing financial statements.

    Attributes:
        total_files: Total number of PDF files found
        successful_files: Number of files successfully processed
        failed_files: Number of files that failed processing
        total_transactions: Total number of transactions extracted
        errors: List of error messages encountered
        statements: List of parsed statements
    """

    total_files: int = Field(default=0, description="Total number of PDF files found")
    successful_files: int = Field(
        default=0, description="Number of files successfully processed"
    )
    failed_files: int = Field(
        default=0, description="Number of files that failed processing"
    )
    total_transactions: int = Field(
        default=0, description="Total number of transactions extracted"
    )
    errors: List[str] = Field(
        default_factory=list, description="List of error messages encountered"
    )
    statements: List[Statement] = Field(
        default_factory=list, description="List of parsed statements"
    )


class ExtractedDocument(BaseModel):
    """Structured data extracted from a PDF document.

    Contains markdown-formatted text and any extracted tables.

    Attributes:
        markdown: Markdown-formatted text content from the PDF
        tables: List of DataFrames for extracted tables
        table_names: Names/labels for each extracted table
        source_file: Original PDF filename
    """

    markdown: str = Field(default="", description="Markdown-formatted text content")
    tables: List[Any] = Field(
        default_factory=list, description="List of extracted tables (DataFrames)"
    )
    table_names: List[str] = Field(
        default_factory=list, description="Names/labels for each table"
    )
    source_file: str = Field(default="", description="Original PDF filename")

    class Config:
        arbitrary_types_allowed = True  # Allow pandas DataFrames
