"""Tests for RegexVendorStrategy."""

import pandas as pd
import pytest

from statement_processor.analytics.strategies.regex_vendor import RegexVendorStrategy, VendorPattern


class TestVendorPattern:
    """Tests for VendorPattern dataclass."""

    def test_valid_pattern(self) -> None:
        """Valid regex pattern should be accepted."""
        vp = VendorPattern(pattern=r"Audible\*.*", vendor_name="Audible")
        assert vp.pattern == r"Audible\*.*"
        assert vp.vendor_name == "Audible"

    def test_invalid_pattern_raises(self) -> None:
        """Invalid regex pattern should raise ValueError."""
        with pytest.raises(ValueError, match="Invalid regex pattern"):
            VendorPattern(pattern=r"[invalid", vendor_name="Test")


class TestRegexVendorStrategy:
    """Tests for RegexVendorStrategy."""

    def test_name(self) -> None:
        """Strategy name should be 'regex_vendor'."""
        strategy = RegexVendorStrategy()
        assert strategy.name == "regex_vendor"

    def test_empty_transactions(self) -> None:
        """Empty DataFrame should return empty list."""
        strategy = RegexVendorStrategy()
        strategy.add_pattern(r"Audible\*.*", "Audible")
        result = strategy.cluster(pd.DataFrame())
        assert result == []

    def test_no_patterns(self) -> None:
        """No patterns defined should return empty list."""
        strategy = RegexVendorStrategy()
        df = pd.DataFrame(
            {"date": ["2024-01-01"], "description": ["Test"], "amount": [10.0]}
        )
        result = strategy.cluster(df)
        assert result == []

    def test_audible_pattern_matches(self) -> None:
        """Audible pattern should match various Audible transaction formats."""
        strategy = RegexVendorStrategy()
        strategy.add_pattern(r"Audible\*.*", "Audible")

        df = pd.DataFrame(
            {
                "date": ["2024-01-15", "2024-02-15", "2024-03-15"],
                "description": [
                    "Audible*501CY3Y03 Amzn.com/bill NJ",
                    "Audible*CY7IP98G3 Amzn.com/bill NJ",
                    "Audible*FO5G89N83 Amzn.com/bill NJ",
                ],
                "amount": [14.95, 14.95, 14.95],
            }
        )

        clusters = strategy.cluster(df)

        assert len(clusters) == 1
        cluster = clusters[0]
        assert cluster.label == "vendor:Audible"
        assert cluster.metadata["vendor_name"] == "Audible"
        assert cluster.metadata["match_strategy"] == "regex"
        assert len(cluster.memberships) == 3
        assert all(score == 1.0 for score in cluster.memberships.values())

    def test_case_insensitive_by_default(self) -> None:
        """Pattern matching should be case-insensitive by default."""
        strategy = RegexVendorStrategy()
        strategy.add_pattern(r"netflix\.com.*", "Netflix")

        df = pd.DataFrame(
            {
                "date": ["2024-01-01", "2024-02-01"],
                "description": ["NETFLIX.COM SUBSCRIPTION", "Netflix.com Monthly"],
                "amount": [15.99, 15.99],
            }
        )

        clusters = strategy.cluster(df)

        assert len(clusters) == 1
        assert len(clusters[0].memberships) == 2

    def test_multiple_patterns(self) -> None:
        """Multiple patterns should create separate clusters."""
        strategy = RegexVendorStrategy()
        strategy.add_pattern(r"Audible\*.*", "Audible")
        strategy.add_pattern(r"SPOTIFY.*", "Spotify")

        df = pd.DataFrame(
            {
                "date": ["2024-01-15", "2024-01-20", "2024-02-15"],
                "description": [
                    "Audible*501CY3Y03 Amzn.com/bill NJ",
                    "SPOTIFY USA",
                    "Audible*CY7IP98G3 Amzn.com/bill NJ",
                ],
                "amount": [14.95, 9.99, 14.95],
            }
        )

        clusters = strategy.cluster(df)

        assert len(clusters) == 2
        vendor_names = {c.metadata["vendor_name"] for c in clusters}
        assert vendor_names == {"Audible", "Spotify"}

        # Check Audible has 2 transactions
        audible_cluster = next(c for c in clusters if c.metadata["vendor_name"] == "Audible")
        assert len(audible_cluster.memberships) == 2

        # Check Spotify has 1 transaction
        spotify_cluster = next(c for c in clusters if c.metadata["vendor_name"] == "Spotify")
        assert len(spotify_cluster.memberships) == 1

    def test_no_matches(self) -> None:
        """Transactions not matching any pattern should not be clustered."""
        strategy = RegexVendorStrategy()
        strategy.add_pattern(r"Audible\*.*", "Audible")

        df = pd.DataFrame(
            {
                "date": ["2024-01-01"],
                "description": ["WALMART STORE #1234"],
                "amount": [50.00],
            }
        )

        clusters = strategy.cluster(df)
        assert clusters == []

    def test_first_pattern_wins(self) -> None:
        """First matching pattern should win for a transaction."""
        strategy = RegexVendorStrategy()
        strategy.add_pattern(r"AMAZON.*", "Amazon")
        strategy.add_pattern(r"AMAZON PRIME.*", "Amazon Prime")

        df = pd.DataFrame(
            {
                "date": ["2024-01-01"],
                "description": ["AMAZON PRIME VIDEO"],
                "amount": [8.99],
            }
        )

        clusters = strategy.cluster(df)

        assert len(clusters) == 1
        # First pattern "AMAZON.*" should match
        assert clusters[0].metadata["vendor_name"] == "Amazon"

