"""Tests for semantic search functionality (SIMILAR_TO operator)."""

import pytest

from nlql import NLQL
from nlql.adapters import MemoryAdapter


@pytest.fixture
def adapter_with_ai_content() -> MemoryAdapter:
    """Create a MemoryAdapter with AI-related content."""
    adapter = MemoryAdapter()
    
    # Add diverse AI-related content
    adapter.add_text(
        "Artificial intelligence and machine learning are transforming technology.",
        {"topic": "AI", "author": "Alice"}
    )
    adapter.add_text(
        "Neural networks are the foundation of deep learning systems.",
        {"topic": "ML", "author": "Bob"}
    )
    adapter.add_text(
        "Natural language processing enables computers to understand human language.",
        {"topic": "NLP", "author": "Alice"}
    )
    adapter.add_text(
        "Computer vision allows machines to interpret visual information.",
        {"topic": "CV", "author": "Charlie"}
    )
    adapter.add_text(
        "The weather is sunny today and perfect for outdoor activities.",
        {"topic": "Weather", "author": "David"}
    )
    
    return adapter


def test_similar_to_basic(adapter_with_ai_content: MemoryAdapter) -> None:
    """Test basic SIMILAR_TO functionality."""
    nlql = NLQL(adapter=adapter_with_ai_content)
    
    # Query for AI-related content
    results = nlql.execute('SELECT CHUNK WHERE SIMILAR_TO("artificial intelligence") > 0.5')
    
    # Should return at least the AI-related chunks
    assert len(results) > 0
    
    # All results should have similarity scores in metadata
    for result in results:
        assert "similarity" in result.metadata
        assert isinstance(result.metadata["similarity"], float)
        assert 0.0 <= result.metadata["similarity"] <= 1.0
        assert result.metadata["similarity"] > 0.5


def test_similar_to_with_order_by(adapter_with_ai_content: MemoryAdapter) -> None:
    """Test SIMILAR_TO with ORDER BY SIMILARITY."""
    nlql = NLQL(adapter=adapter_with_ai_content)
    
    # Query and order by similarity
    results = nlql.execute(
        'SELECT CHUNK WHERE SIMILAR_TO("machine learning") > 0.3 ORDER BY SIMILARITY DESC'
    )
    
    # Results should be ordered by similarity (descending)
    assert len(results) > 0
    
    # Check that results are in descending order
    similarities = [r.metadata["similarity"] for r in results]
    assert similarities == sorted(similarities, reverse=True)
    
    # First result should be most similar
    assert similarities[0] >= similarities[-1]


def test_similar_to_with_limit(adapter_with_ai_content: MemoryAdapter) -> None:
    """Test SIMILAR_TO with LIMIT."""
    nlql = NLQL(adapter=adapter_with_ai_content)
    
    # Query with limit
    results = nlql.execute(
        'SELECT CHUNK WHERE SIMILAR_TO("neural networks") > 0.2 LIMIT 2'
    )
    
    # Should return at most 2 results
    assert len(results) <= 2
    
    # All results should have similarity > 0.2
    for result in results:
        assert result.metadata["similarity"] > 0.2


def test_similar_to_with_metadata_filter(adapter_with_ai_content: MemoryAdapter) -> None:
    """Test SIMILAR_TO combined with metadata filtering."""
    nlql = NLQL(adapter=adapter_with_ai_content)
    
    # Query with both semantic and metadata filters
    results = nlql.execute(
        'SELECT CHUNK WHERE SIMILAR_TO("AI technology") > 0.3 AND META("author") == "Alice"'
    )
    
    # All results should match both conditions
    for result in results:
        assert result.metadata["similarity"] > 0.3
        assert result.metadata["author"] == "Alice"


def test_similar_to_with_sentence_granularity(adapter_with_ai_content: MemoryAdapter) -> None:
    """Test SIMILAR_TO with SENTENCE granularity."""
    nlql = NLQL(adapter=adapter_with_ai_content)
    
    # Query sentences
    results = nlql.execute(
        'SELECT SENTENCE WHERE SIMILAR_TO("deep learning") > 0.3 LIMIT 3'
    )
    
    # Should return sentences, not chunks
    assert len(results) > 0
    assert len(results) <= 3
    
    # All results should have similarity scores
    for result in results:
        assert "similarity" in result.metadata
        assert result.metadata["similarity"] > 0.3


def test_similar_to_no_matches(adapter_with_ai_content: MemoryAdapter) -> None:
    """Test SIMILAR_TO with very high threshold (no matches)."""
    nlql = NLQL(adapter=adapter_with_ai_content)
    
    # Query with impossible threshold
    results = nlql.execute('SELECT CHUNK WHERE SIMILAR_TO("quantum physics") > 0.99')
    
    # Should return empty or very few results
    # (depending on embedding model, might have some low similarity matches)
    assert len(results) == 0 or all(r.metadata["similarity"] > 0.99 for r in results)


def test_similar_to_complex_query(adapter_with_ai_content: MemoryAdapter) -> None:
    """Test complex query with SIMILAR_TO, WHERE, ORDER BY, and LIMIT."""
    nlql = NLQL(adapter=adapter_with_ai_content)
    
    # Complex query combining multiple features
    query = '''
        SELECT CHUNK
        WHERE 
            SIMILAR_TO("machine learning and AI") > 0.4
            AND META("topic") != "Weather"
        ORDER BY SIMILARITY DESC
        LIMIT 3
    '''
    
    results = nlql.execute(query)
    
    # Should return at most 3 results
    assert len(results) <= 3
    
    # All results should match conditions
    for result in results:
        assert result.metadata["similarity"] > 0.4
        assert result.metadata["topic"] != "Weather"
    
    # Results should be ordered by similarity
    if len(results) > 1:
        similarities = [r.metadata["similarity"] for r in results]
        assert similarities == sorted(similarities, reverse=True)

