"""Unit tests for ExactMatchStrategy.

Tests validate that the strategy correctly groups transactions by identical description.
"""

import pandas as pd
import pytest

from statement_processor.analytics.strategies.exact_match import ExactMatchStrategy


class TestExactMatchStrategy:
    """Unit tests for ExactMatchStrategy.

    **Validates: Requirements 4.2, 4.3**
    """

    def test_groups_by_identical_description(self):
        """Transactions with identical descriptions SHALL be grouped together."""
        strategy = ExactMatchStrategy()
        df = pd.DataFrame({
            "date": ["2024-01-01", "2024-01-15", "2024-02-01", "2024-02-15"],
            "description": ["Walmart", "Target", "Walmart", "Target"],
            "amount": [50.0, 30.0, 75.0, 45.0]
        })

        clusters = strategy.cluster(df)

        assert len(clusters) == 2

        # Find Walmart and Target clusters
        walmart_cluster = next((c for c in clusters if c.metadata.get("vendor_name") == "Walmart"), None)
        target_cluster = next((c for c in clusters if c.metadata.get("vendor_name") == "Target"), None)

        assert walmart_cluster is not None
        assert target_cluster is not None

        # Walmart transactions are at indices 0 and 2
        assert set(walmart_cluster.indices) == {0, 2}
        # Target transactions are at indices 1 and 3
        assert set(target_cluster.indices) == {1, 3}

    def test_all_matches_have_membership_one(self):
        """All exact matches SHALL have membership score of 1.0."""
        strategy = ExactMatchStrategy()
        df = pd.DataFrame({
            "date": ["2024-01-01", "2024-01-15", "2024-02-01"],
            "description": ["Amazon", "Amazon", "Amazon"],
            "amount": [100.0, 200.0, 150.0]
        })

        clusters = strategy.cluster(df)

        assert len(clusters) == 1
        for idx, score in clusters[0].memberships.items():
            assert score == 1.0

    def test_empty_dataframe_returns_empty_list(self):
        """Empty DataFrame SHALL return empty list."""
        strategy = ExactMatchStrategy()
        empty_df = pd.DataFrame({"date": [], "description": [], "amount": []})

        clusters = strategy.cluster(empty_df)

        assert clusters == []

    def test_cluster_label_starts_with_vendor(self):
        """All clusters SHALL have label starting with 'vendor:'."""
        strategy = ExactMatchStrategy()
        df = pd.DataFrame({
            "date": ["2024-01-01", "2024-01-02"],
            "description": ["Store A", "Store B"],
            "amount": [10.0, 20.0]
        })

        clusters = strategy.cluster(df)

        for cluster in clusters:
            assert cluster.label.startswith("vendor:")
            # Label should include vendor name for unique identification
            vendor_name = cluster.metadata["vendor_name"]
            assert cluster.label == f"vendor:{vendor_name}"

    def test_metadata_contains_vendor_name(self):
        """Cluster metadata SHALL contain vendor_name."""
        strategy = ExactMatchStrategy()
        df = pd.DataFrame({
            "date": ["2024-01-01"],
            "description": ["Netflix"],
            "amount": [15.99]
        })

        clusters = strategy.cluster(df)

        assert len(clusters) == 1
        assert clusters[0].metadata["vendor_name"] == "Netflix"
        assert clusters[0].metadata["match_strategy"] == "exact"

    def test_strategy_name(self):
        """Strategy name SHALL be 'exact_match'."""
        strategy = ExactMatchStrategy()
        assert strategy.name == "exact_match"

    def test_single_transaction_creates_single_cluster(self):
        """Single transaction SHALL create a cluster with one member."""
        strategy = ExactMatchStrategy()
        df = pd.DataFrame({
            "date": ["2024-01-01"],
            "description": ["One-time Purchase"],
            "amount": [99.99]
        })

        clusters = strategy.cluster(df)

        assert len(clusters) == 1
        assert len(clusters[0].indices) == 1
        assert clusters[0].memberships[0] == 1.0

    def test_unique_descriptions_create_separate_clusters(self):
        """Each unique description SHALL create its own cluster."""
        strategy = ExactMatchStrategy()
        df = pd.DataFrame({
            "date": ["2024-01-01", "2024-01-02", "2024-01-03"],
            "description": ["Vendor A", "Vendor B", "Vendor C"],
            "amount": [10.0, 20.0, 30.0]
        })

        clusters = strategy.cluster(df)

        assert len(clusters) == 3
        vendor_names = {c.metadata["vendor_name"] for c in clusters}
        assert vendor_names == {"Vendor A", "Vendor B", "Vendor C"}
