"""Recurrence Analyzer module for classifying transactions by recurrence pattern.

NOTE: This is legacy code preserved for future integration into statement_processor.
It would need to be adapted to work as a ClusteringStrategy implementation.
"""

import logging
from collections import defaultdict
from typing import Set, Dict, List, Tuple

import pandas as pd

logger = logging.getLogger(__name__)


class RecurrenceAnalyzer:
    """Analyzes transactions to identify recurring patterns.
    
    This class examines transaction descriptions and amounts to classify
    transactions as monthly recurring, yearly recurring, or one-time.
    """

    def __init__(self, amount_tolerance: float = 0.10):
        """Initialize with amount tolerance for matching (default 10%).
        
        Args:
            amount_tolerance: Tolerance threshold for amount matching (0.10 = 10%)
        """
        self._amount_tolerance = amount_tolerance

    @property
    def amount_tolerance(self) -> float:
        """Get the amount tolerance threshold."""
        return self._amount_tolerance

    def analyze(self, df: pd.DataFrame) -> pd.DataFrame:
        """Analyze transactions and add recurrence classification column.
        
        Args:
            df: DataFrame with columns: date, description, amount, source_file
            
        Returns:
            DataFrame with added 'category' column containing:
            - "monthly" for monthly recurring transactions
            - "yearly" for yearly recurring transactions  
            - "one-time" for non-recurring transactions
        """
        if df.empty:
            return df.copy()
        
        result = df.copy()
        
        if 'month' not in result.columns or 'year' not in result.columns:
            result['date_parsed'] = pd.to_datetime(result['date'], format='%Y-%m-%d', errors='coerce')
            result['month'] = result['date_parsed'].dt.month
            result['year'] = result['date_parsed'].dt.year
            result = result.drop(columns=['date_parsed'])
        
        yearly_patterns = self.find_yearly_patterns(result)
        monthly_patterns = self.find_monthly_patterns(result)
        
        pure_yearly_patterns = yearly_patterns - monthly_patterns
        
        logger.info(f"Found {len(monthly_patterns)} monthly patterns, {len(pure_yearly_patterns)} yearly patterns")
        
        def classify(row) -> str:
            desc = row['description']
            if desc in monthly_patterns:
                return "monthly"
            if desc in pure_yearly_patterns:
                return "yearly"
            return "one-time"
        
        result['category'] = result.apply(classify, axis=1)
        
        return result

    def find_monthly_patterns(self, df: pd.DataFrame) -> Set[str]:
        """Identify descriptions that appear monthly.
        
        A description is considered monthly recurring if there's an amount that
        appears roughly once per month across 3+ different months.
        
        Args:
            df: DataFrame with columns: description, amount, month, year
            
        Returns:
            Set of description strings that are monthly recurring
        """
        if df.empty:
            return set()
        
        monthly_patterns: Set[str] = set()
        
        for desc, group in df.groupby('description'):
            if self._has_monthly_recurring_amount(group):
                monthly_patterns.add(desc)
                logger.debug(f"Monthly pattern found: {desc}")
        
        return monthly_patterns
    
    def _has_monthly_recurring_amount(self, group: pd.DataFrame) -> bool:
        """Check if a vendor has a monthly recurring amount pattern."""
        if len(group) < 3:
            return False
        
        amounts = group['amount'].tolist()
        amount_clusters = self._cluster_amounts(amounts)
        
        for center_amount, indices in amount_clusters.items():
            if len(indices) < 3:
                continue
            
            cluster_txns = group.iloc[indices]
            unique_calendar_months = cluster_txns['month'].nunique()
            
            if unique_calendar_months < 3:
                continue
            
            year_months = cluster_txns[['year', 'month']].drop_duplicates()
            unique_periods = len(year_months)
            
            if len(indices) <= unique_periods * 1.5:
                logger.debug(f"Found monthly recurring: ${center_amount:.2f} in {unique_calendar_months} calendar months")
                return True
        
        return False
    
    def _cluster_amounts(self, amounts: List[float]) -> Dict[float, List[int]]:
        """Cluster amounts by tolerance and return indices."""
        clusters: Dict[float, List[int]] = {}
        
        for i, amount in enumerate(amounts):
            found_cluster = False
            for center in list(clusters.keys()):
                if self.amounts_match(amount, center):
                    clusters[center].append(i)
                    found_cluster = True
                    break
            
            if not found_cluster:
                clusters[amount] = [i]
        
        return clusters

    def find_yearly_patterns(self, df: pd.DataFrame) -> Set[str]:
        """Identify descriptions that appear yearly.
        
        A description is considered yearly recurring if it appears in the same
        calendar month across 2+ years with amounts within tolerance.
        
        Args:
            df: DataFrame with columns: description, amount, month, year
            
        Returns:
            Set of description strings that are yearly recurring
        """
        if df.empty:
            return set()
        
        yearly_patterns: Set[str] = set()
        
        for desc, group in df.groupby('description'):
            unique_years = group['year'].nunique()
            
            if unique_years < 2:
                continue
            
            total_transactions = len(group)
            if total_transactions > unique_years * 2:
                logger.debug(f"Skipping {desc} for yearly: {total_transactions} txns over {unique_years} years")
                continue
            
            for month, month_group in group.groupby('month'):
                years_in_month = month_group['year'].nunique()
                
                if years_in_month >= 2:
                    amounts = month_group['amount'].tolist()
                    if self._amounts_within_tolerance(amounts):
                        yearly_patterns.add(desc)
                        logger.debug(f"Yearly pattern found: {desc} (month {month}, {years_in_month} years)")
                        break
        
        return yearly_patterns

    def amounts_match(self, amount1: float, amount2: float) -> bool:
        """Check if two amounts are within tolerance threshold."""
        if amount1 == 0 and amount2 == 0:
            return True
        
        max_abs = max(abs(amount1), abs(amount2))
        if max_abs == 0:
            return True
        
        diff = abs(amount1 - amount2)
        relative_diff = diff / max_abs
        
        return relative_diff <= self._amount_tolerance

    def _amounts_within_tolerance(self, amounts: List[float]) -> bool:
        """Check if all amounts in a list are within tolerance of each other."""
        if len(amounts) < 2:
            return True
        
        reference = amounts[0]
        for amount in amounts[1:]:
            if not self.amounts_match(reference, amount):
                return False
        
        return True
