"""Tests for the Recurrence Analyzer module."""

import string
from datetime import datetime
from typing import List

import pandas as pd
import pytest
from hypothesis import given, settings, strategies as st, assume

from src.recurrence_analyzer import RecurrenceAnalyzer
from src.models import Transaction


# Strategies for generating test data
valid_years = st.integers(min_value=2020, max_value=2025)
valid_months = st.integers(min_value=1, max_value=12)
valid_days = st.integers(min_value=1, max_value=28)

# Generate valid descriptions
valid_descriptions = st.text(
    alphabet=string.ascii_letters + string.digits + " -_.&'",
    min_size=3,
    max_size=30
).filter(lambda x: x.strip() and x == x.strip())

# Generate valid amounts (positive charges)
valid_amounts = st.floats(
    min_value=1.0,
    max_value=10000.0,
    allow_nan=False,
    allow_infinity=False
).map(lambda x: round(x, 2))


def make_iso_date(year: int, month: int, day: int) -> str:
    """Create ISO date string from components."""
    return f"{year:04d}-{month:02d}-{day:02d}"


def create_transaction_df(transactions: List[dict]) -> pd.DataFrame:
    """Create a DataFrame from transaction dictionaries."""
    if not transactions:
        return pd.DataFrame(columns=['date', 'description', 'amount', 'source_file', 'month', 'year'])
    
    df = pd.DataFrame(transactions)
    df['date_parsed'] = pd.to_datetime(df['date'], format='%Y-%m-%d', errors='coerce')
    df['month'] = df['date_parsed'].dt.month
    df['year'] = df['date_parsed'].dt.year
    df = df.drop(columns=['date_parsed'])
    return df


# **Feature: credit-card-statement-processor, Property 5: Monthly Recurring Detection**
# **Validates: Requirements 3.2**
class TestMonthlyRecurringDetection:
    """Property-based tests for monthly recurring detection."""

    @given(
        base_amount=valid_amounts,
        description=valid_descriptions,
        start_year=st.integers(min_value=2020, max_value=2023),
        num_months=st.integers(min_value=3, max_value=12),
        day=valid_days,
    )
    @settings(max_examples=100)
    def test_monthly_recurring_detection(
        self, base_amount: float, description: str, start_year: int, num_months: int, day: int
    ):
        """Property 5: Monthly Recurring Detection.
        
        For any set of transactions where a description appears in 3 or more
        different months with amounts within tolerance, the analyzer SHALL
        classify all matching transactions as "monthly" recurring.
        """
        # Create transactions spanning multiple months with same description
        transactions = []
        month = 1
        year = start_year
        
        for i in range(num_months):
            transactions.append({
                'date': make_iso_date(year, month, day),
                'description': description,
                'amount': base_amount,  # Same amount for all
                'source_file': 'test.pdf'
            })
            month += 1
            if month > 12:
                month = 1
                year += 1
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # All transactions with this description should be classified as monthly
        matching = result[result['description'] == description]
        assert len(matching) == num_months
        assert all(matching['category'] == 'monthly'), (
            f"Expected all transactions to be 'monthly', got: {matching['category'].tolist()}"
        )

    @given(
        base_amount=valid_amounts,
        description=valid_descriptions,
        start_year=st.integers(min_value=2020, max_value=2023),
        num_months=st.integers(min_value=3, max_value=12),
        day=valid_days,
        tolerance=st.floats(min_value=0.05, max_value=0.20),
        variation_factors=st.lists(
            st.floats(min_value=-0.4, max_value=0.4),
            min_size=12,
            max_size=12
        ),
    )
    @settings(max_examples=100)
    def test_monthly_with_amount_variation_within_tolerance(
        self, base_amount: float, description: str, start_year: int, 
        num_months: int, day: int, tolerance: float, variation_factors: list
    ):
        """Property 5: Monthly detection with amount variation within tolerance.
        
        Transactions with amounts varying within tolerance should still be
        classified as monthly recurring.
        """
        transactions = []
        month = 1
        year = start_year
        
        for i in range(num_months):
            # Vary amount within tolerance - use factor scaled to stay within tolerance
            # Scale factor to be within half the tolerance to ensure all amounts are close
            variation = base_amount * (tolerance / 3) * variation_factors[i]
            amount = base_amount + variation
            
            transactions.append({
                'date': make_iso_date(year, month, day),
                'description': description,
                'amount': round(amount, 2),
                'source_file': 'test.pdf'
            })
            month += 1
            if month > 12:
                month = 1
                year += 1
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=tolerance)
        
        result = analyzer.analyze(df)
        
        # All transactions should be classified as monthly
        matching = result[result['description'] == description]
        assert all(matching['category'] == 'monthly'), (
            f"Expected 'monthly' with tolerance {tolerance}, got: {matching['category'].tolist()}"
        )


# **Feature: credit-card-statement-processor, Property 6: Yearly Recurring Detection**
# **Validates: Requirements 3.3, 5.3**
class TestYearlyRecurringDetection:
    """Property-based tests for yearly recurring detection."""

    @given(
        base_amount=valid_amounts,
        description=valid_descriptions,
        start_year=st.integers(min_value=2020, max_value=2022),
        month=valid_months,
        day=valid_days,
        num_years=st.integers(min_value=2, max_value=4),
    )
    @settings(max_examples=100)
    def test_yearly_recurring_detection(
        self, base_amount: float, description: str, start_year: int, 
        month: int, day: int, num_years: int
    ):
        """Property 6: Yearly Recurring Detection.
        
        For any set of transactions spanning multiple years where a description
        appears in the same calendar month across 2 or more years with amounts
        within tolerance, the analyzer SHALL classify those transactions as
        "yearly" recurring.
        """
        # Create transactions in the same month across multiple years
        transactions = []
        
        for year_offset in range(num_years):
            transactions.append({
                'date': make_iso_date(start_year + year_offset, month, day),
                'description': description,
                'amount': base_amount,
                'source_file': 'test.pdf'
            })
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # All transactions should be classified as yearly
        matching = result[result['description'] == description]
        assert len(matching) == num_years
        assert all(matching['category'] == 'yearly'), (
            f"Expected all transactions to be 'yearly', got: {matching['category'].tolist()}"
        )

    @given(
        base_amount=valid_amounts,
        description=valid_descriptions,
        start_year=st.integers(min_value=2020, max_value=2022),
        month=valid_months,
        day=valid_days,
        num_years=st.integers(min_value=2, max_value=4),
        tolerance=st.floats(min_value=0.05, max_value=0.20),
        variation_factors=st.lists(
            st.floats(min_value=-0.4, max_value=0.4),
            min_size=4,
            max_size=4
        ),
    )
    @settings(max_examples=100)
    def test_yearly_with_amount_variation_within_tolerance(
        self, base_amount: float, description: str, start_year: int,
        month: int, day: int, num_years: int, tolerance: float, variation_factors: list
    ):
        """Property 6: Yearly detection with amount variation within tolerance.
        
        Transactions with amounts varying within tolerance should still be
        classified as yearly recurring.
        """
        transactions = []
        
        for year_offset in range(num_years):
            # Vary amount within tolerance - use factor scaled to stay within tolerance
            # Scale factor to be within 1/3 of tolerance to ensure all amounts are close
            variation = base_amount * (tolerance / 3) * variation_factors[year_offset]
            amount = base_amount + variation
            
            transactions.append({
                'date': make_iso_date(start_year + year_offset, month, day),
                'description': description,
                'amount': round(amount, 2),
                'source_file': 'test.pdf'
            })
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=tolerance)
        
        result = analyzer.analyze(df)
        
        # All transactions should be classified as yearly
        matching = result[result['description'] == description]
        assert all(matching['category'] == 'yearly'), (
            f"Expected 'yearly' with tolerance {tolerance}, got: {matching['category'].tolist()}"
        )


# **Feature: credit-card-statement-processor, Property 7: One-Time Classification Completeness**
# **Validates: Requirements 3.4**
class TestOneTimeClassification:
    """Property-based tests for one-time classification completeness."""

    @given(
        amounts=st.lists(valid_amounts, min_size=1, max_size=10),
        descriptions=st.lists(valid_descriptions, min_size=1, max_size=10, unique=True),
        year=valid_years,
        month=valid_months,
        day=valid_days,
    )
    @settings(max_examples=100)
    def test_one_time_classification_completeness(
        self, amounts: List[float], descriptions: List[str], year: int, month: int, day: int
    ):
        """Property 7: One-Time Classification Completeness.
        
        For any transaction that is not classified as monthly or yearly recurring,
        the analyzer SHALL classify it as "one-time", ensuring every transaction
        has exactly one category.
        """
        # Create transactions with unique descriptions (each appears only once)
        transactions = []
        for i, (desc, amount) in enumerate(zip(descriptions, amounts)):
            transactions.append({
                'date': make_iso_date(year, month, min(day + i, 28)),
                'description': desc,
                'amount': amount,
                'source_file': 'test.pdf'
            })
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # Every transaction should have exactly one category
        assert 'category' in result.columns
        assert len(result) == len(transactions)
        
        # All categories should be valid
        valid_categories = {'monthly', 'yearly', 'one-time'}
        assert all(cat in valid_categories for cat in result['category']), (
            f"Invalid categories found: {result['category'].unique()}"
        )
        
        # Single-occurrence transactions should be one-time
        for desc in descriptions:
            matching = result[result['description'] == desc]
            if len(matching) == 1:
                assert matching['category'].iloc[0] == 'one-time', (
                    f"Single occurrence '{desc}' should be 'one-time', got: {matching['category'].iloc[0]}"
                )

    @given(
        description=valid_descriptions,
        amount=valid_amounts,
        year=valid_years,
        day=valid_days,
    )
    @settings(max_examples=100)
    def test_two_occurrences_not_monthly(
        self, description: str, amount: float, year: int, day: int
    ):
        """Property 7: Two occurrences should not be classified as monthly.
        
        A description appearing in only 2 months should be classified as one-time,
        not monthly (which requires 3+ months).
        """
        # Create only 2 transactions in different months
        transactions = [
            {
                'date': make_iso_date(year, 1, day),
                'description': description,
                'amount': amount,
                'source_file': 'test.pdf'
            },
            {
                'date': make_iso_date(year, 2, day),
                'description': description,
                'amount': amount,
                'source_file': 'test.pdf'
            }
        ]
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # Should be one-time, not monthly
        matching = result[result['description'] == description]
        assert all(matching['category'] == 'one-time'), (
            f"2 occurrences should be 'one-time', got: {matching['category'].tolist()}"
        )


# **Feature: credit-card-statement-processor, Property 8: Amount Tolerance Matching**
# **Validates: Requirements 3.5**
class TestAmountToleranceMatching:
    """Property-based tests for amount tolerance matching."""

    @given(
        amount1=valid_amounts,
        tolerance=st.floats(min_value=0.01, max_value=0.50),
        within_tolerance=st.booleans(),
    )
    @settings(max_examples=100)
    def test_amount_tolerance_matching(
        self, amount1: float, tolerance: float, within_tolerance: bool
    ):
        """Property 8: Amount Tolerance Matching.
        
        For any two amounts where |amount1 - amount2| / max(|amount1|, |amount2|) <= tolerance_threshold,
        the amounts_match function SHALL return True; otherwise it SHALL return False.
        """
        analyzer = RecurrenceAnalyzer(amount_tolerance=tolerance)
        
        if within_tolerance:
            # Create amount2 within tolerance
            # Use 90% of tolerance to ensure we're clearly within
            variation = amount1 * tolerance * 0.9
            amount2 = amount1 + variation
        else:
            # Create amount2 outside tolerance
            # Use 150% of tolerance to ensure we're clearly outside
            variation = amount1 * tolerance * 1.5
            amount2 = amount1 + variation
        
        result = analyzer.amounts_match(amount1, amount2)
        
        # Verify the result matches expected
        max_abs = max(abs(amount1), abs(amount2))
        if max_abs > 0:
            actual_diff = abs(amount1 - amount2) / max_abs
            expected = actual_diff <= tolerance
            assert result == expected, (
                f"amounts_match({amount1}, {amount2}) = {result}, "
                f"expected {expected} (diff={actual_diff}, tolerance={tolerance})"
            )

    @given(
        amount=valid_amounts,
        tolerance=st.floats(min_value=0.01, max_value=0.50),
    )
    @settings(max_examples=100)
    def test_same_amounts_always_match(self, amount: float, tolerance: float):
        """Property 8: Same amounts should always match regardless of tolerance."""
        analyzer = RecurrenceAnalyzer(amount_tolerance=tolerance)
        
        assert analyzer.amounts_match(amount, amount) is True, (
            f"Same amounts should always match: {amount}"
        )

    @given(tolerance=st.floats(min_value=0.01, max_value=0.50))
    @settings(max_examples=100)
    def test_zero_amounts_match(self, tolerance: float):
        """Property 8: Zero amounts should match each other."""
        analyzer = RecurrenceAnalyzer(amount_tolerance=tolerance)
        
        assert analyzer.amounts_match(0.0, 0.0) is True, (
            "Zero amounts should match"
        )

    @given(
        amount1=valid_amounts,
        tolerance=st.floats(min_value=0.01, max_value=0.50),
    )
    @settings(max_examples=100)
    def test_boundary_tolerance(self, amount1: float, tolerance: float):
        """Property 8: Amounts exactly at tolerance boundary should match."""
        analyzer = RecurrenceAnalyzer(amount_tolerance=tolerance)
        
        # Create amount2 exactly at tolerance boundary
        amount2 = amount1 * (1 + tolerance)
        
        # Should match (at boundary)
        result = analyzer.amounts_match(amount1, amount2)
        
        # Verify calculation
        max_abs = max(abs(amount1), abs(amount2))
        actual_diff = abs(amount1 - amount2) / max_abs
        
        # At boundary, should match (<=)
        assert result is True, (
            f"Boundary amounts should match: diff={actual_diff}, tolerance={tolerance}"
        )


class TestRecurrenceAnalyzerEdgeCases:
    """Unit tests for edge cases."""

    @pytest.fixture
    def analyzer(self):
        """Create a RecurrenceAnalyzer instance."""
        return RecurrenceAnalyzer(amount_tolerance=0.10)

    def test_empty_dataframe(self, analyzer):
        """Test analyzing empty DataFrame."""
        df = pd.DataFrame(columns=['date', 'description', 'amount', 'source_file'])
        result = analyzer.analyze(df)
        assert len(result) == 0

    def test_single_transaction(self, analyzer):
        """Test single transaction is classified as one-time."""
        df = create_transaction_df([{
            'date': '2024-01-15',
            'description': 'SINGLE PURCHASE',
            'amount': 100.00,
            'source_file': 'test.pdf'
        }])
        
        result = analyzer.analyze(df)
        assert result['category'].iloc[0] == 'one-time'

    def test_monthly_takes_precedence_over_yearly(self, analyzer):
        """Test that monthly classification takes precedence when both could apply."""
        # Create transactions that appear monthly AND in same month across years
        transactions = []
        for year in [2023, 2024]:
            for month in [1, 2, 3, 4]:
                transactions.append({
                    'date': make_iso_date(year, month, 15),
                    'description': 'SUBSCRIPTION SERVICE',
                    'amount': 50.00,
                    'source_file': 'test.pdf'
                })
        
        df = create_transaction_df(transactions)
        result = analyzer.analyze(df)
        
        # Should be monthly (appears in 4+ different months)
        assert all(result['category'] == 'monthly')


class TestRecurrenceAnalyzerBugFixes:
    """Tests for specific bug fixes."""

    def test_frequent_transactions_not_yearly(self):
        """Test that transactions occurring 14 times in 2 years are monthly, not yearly.
        
        Bug: A subscription like "ASCENSION MIXED MARTIAL" that occurs 14 times
        over 2 years was being classified as yearly instead of monthly.
        """
        # Create 14 transactions over 2 years (roughly monthly)
        transactions = []
        year = 2024
        month = 1
        for i in range(14):
            transactions.append({
                'date': make_iso_date(year, month, 15),
                'description': 'ASCENSION MIXED MARTIAL 305-440-8203 GA',
                'amount': 129.14,
                'source_file': 'test.pdf'
            })
            month += 1
            if month > 12:
                month = 1
                year += 1
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # Should be monthly, not yearly
        assert all(result['category'] == 'monthly'), (
            f"14 transactions over 2 years should be monthly, got: {result['category'].unique()}"
        )

    def test_yearly_requires_low_frequency(self):
        """Test that yearly classification requires roughly 1 transaction per year.
        
        A yearly subscription should have approximately 1 transaction per year,
        not multiple transactions per month.
        """
        # Create a true yearly subscription (1 per year, same month)
        transactions = [
            {'date': '2024-03-15', 'description': 'ANNUAL SUBSCRIPTION', 'amount': 99.00, 'source_file': 'test.pdf'},
            {'date': '2025-03-15', 'description': 'ANNUAL SUBSCRIPTION', 'amount': 99.00, 'source_file': 'test.pdf'},
        ]
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # Should be yearly
        assert all(result['category'] == 'yearly'), (
            f"2 transactions in same month across 2 years should be yearly, got: {result['category'].unique()}"
        )

    def test_monthly_with_varying_amounts(self):
        """Test monthly detection with amounts that vary but are within tolerance."""
        transactions = []
        base_amount = 129.14
        year = 2024
        month = 1
        
        for i in range(6):
            # Vary amount slightly (within 10% tolerance)
            amount = base_amount + (i * 2)  # 129.14, 131.14, 133.14, etc.
            transactions.append({
                'date': make_iso_date(year, month, 15),
                'description': 'VARYING SUBSCRIPTION',
                'amount': amount,
                'source_file': 'test.pdf'
            })
            month += 1
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # Check what we got - amounts vary by ~8% which is within tolerance
        categories = result['category'].unique()
        print(f"Categories: {categories}")
        print(f"Amounts: {result['amount'].tolist()}")


    def test_subscription_with_adhoc_purchases(self):
        """Test vendor with both monthly subscription and ad-hoc purchases.
        
        A gym membership at $129/mo plus occasional merch purchases should
        still be detected as monthly recurring.
        """
        transactions = []
        
        # Monthly subscription at $129
        for i in range(12):
            year = 2024 if i < 12 else 2025
            month = (i % 12) + 1
            transactions.append({
                'date': make_iso_date(year, month, 15),
                'description': 'ASCENSION MIXED MARTIAL',
                'amount': 129.14,
                'source_file': 'test.pdf'
            })
        
        # Ad-hoc purchases at varying amounts
        transactions.extend([
            {'date': '2024-03-20', 'description': 'ASCENSION MIXED MARTIAL', 'amount': 25.00, 'source_file': 'test.pdf'},
            {'date': '2024-07-10', 'description': 'ASCENSION MIXED MARTIAL', 'amount': 45.00, 'source_file': 'test.pdf'},
            {'date': '2024-11-05', 'description': 'ASCENSION MIXED MARTIAL', 'amount': 35.00, 'source_file': 'test.pdf'},
        ])
        
        df = create_transaction_df(transactions)
        analyzer = RecurrenceAnalyzer(amount_tolerance=0.10)
        
        result = analyzer.analyze(df)
        
        # Should be monthly (the $129 subscription is detected)
        assert all(result['category'] == 'monthly'), (
            f"Vendor with subscription + ad-hoc should be monthly, got: {result['category'].unique()}"
        )
