"""Tests for granularity transformation (SENTENCE and SPAN)."""

import pytest

from nlql import NLQL
from nlql.adapters import MemoryAdapter


@pytest.fixture
def adapter_with_text() -> MemoryAdapter:
    """Create a MemoryAdapter with multi-sentence text."""
    adapter = MemoryAdapter()

    # Add chunks with multiple sentences
    # Note: chunk_id is auto-generated as "chunk_0", "chunk_1", etc.
    adapter.add_text(
        "First sentence. Second sentence. Third sentence.",
        {"topic": "test", "author": "Alice"}
    )
    adapter.add_text(
        "Another sentence. Yet another one.",
        {"topic": "test", "author": "Bob"}
    )

    return adapter


def test_select_sentence_basic(adapter_with_text: MemoryAdapter) -> None:
    """Test SELECT SENTENCE splits chunks into sentences."""
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute("SELECT SENTENCE")
    
    # Should have 5 sentences total (3 from chunk1, 2 from chunk2)
    assert len(results) == 5
    
    # Check sentence contents
    assert results[0].content == "First sentence."
    assert results[1].content == "Second sentence."
    assert results[2].content == "Third sentence."
    assert results[3].content == "Another sentence."
    assert results[4].content == "Yet another one."


def test_select_sentence_preserves_metadata(adapter_with_text: MemoryAdapter) -> None:
    """Test that SELECT SENTENCE preserves user metadata from parent chunks.

    This test verifies the separation of concerns:
    - User's business metadata (topic, author) is preserved in metadata dict
    - System fields (source_chunk_id) are stored as object attributes, not in metadata
    """
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute("SELECT SENTENCE")

    # Verify user's business metadata is preserved
    # All sentences should have the topic metadata
    assert all(r.metadata.get("topic") == "test" for r in results)

    # First 3 sentences should have author Alice (from first chunk)
    assert results[0].metadata.get("author") == "Alice"
    assert results[1].metadata.get("author") == "Alice"
    assert results[2].metadata.get("author") == "Alice"

    # Last 2 sentences should have author Bob (from second chunk)
    assert results[3].metadata.get("author") == "Bob"
    assert results[4].metadata.get("author") == "Bob"

    # Verify system fields are NOT in metadata (they should be object attributes)
    # metadata should only contain user's business data
    assert "source_chunk_id" not in results[0].metadata
    assert "chunk_id" not in results[0].metadata


def test_select_sentence_system_fields() -> None:
    """Test that SELECT SENTENCE sets system fields as object attributes.

    This test verifies that system-managed fields (source_chunk_id) are stored
    as object attributes, separate from user's business metadata.
    """
    from nlql.text.units import Sentence

    adapter = MemoryAdapter()
    adapter.add_text("First sentence. Second sentence.", {"author": "Alice"})
    adapter.add_text("Third sentence.", {"author": "Bob"})

    nlql = NLQL(adapter=adapter)
    results = nlql.execute("SELECT SENTENCE")

    # Results should be Sentence objects (not just generic Results)
    # We need to access the underlying units to check object attributes
    # For now, we'll verify through the Result objects

    # All 3 sentences should exist
    assert len(results) == 3

    # User metadata should be preserved
    assert results[0].metadata.get("author") == "Alice"
    assert results[1].metadata.get("author") == "Alice"
    assert results[2].metadata.get("author") == "Bob"

    # System fields should NOT pollute user's metadata
    assert "source_chunk_id" not in results[0].metadata
    assert "chunk_id" not in results[0].metadata


def test_select_sentence_with_where(adapter_with_text: MemoryAdapter) -> None:
    """Test SELECT SENTENCE with WHERE clause."""
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute('SELECT SENTENCE WHERE CONTAINS("First")')
    
    # Should only return the first sentence
    assert len(results) == 1
    assert results[0].content == "First sentence."


def test_select_sentence_with_limit(adapter_with_text: MemoryAdapter) -> None:
    """Test SELECT SENTENCE with LIMIT."""
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute("SELECT SENTENCE LIMIT 2")
    
    assert len(results) == 2
    assert results[0].content == "First sentence."
    assert results[1].content == "Second sentence."


def test_select_span_sentence_basic(adapter_with_text: MemoryAdapter) -> None:
    """Test SELECT SPAN(SENTENCE, window=1)."""
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute("SELECT SPAN(SENTENCE, window=1)")
    
    # Should have 5 spans (one for each sentence)
    assert len(results) == 5
    
    # Check first span (should have no context before, one after)
    # Note: The content is the target sentence
    assert results[0].content == "First sentence."
    
    # Check middle span (should have context before and after)
    assert results[1].content == "Second sentence."


def test_select_span_sentence_window_size(adapter_with_text: MemoryAdapter) -> None:
    """Test SELECT SPAN with different window sizes."""
    nlql = NLQL(adapter=adapter_with_text)
    
    # Window size 0 - no context
    results = nlql.execute("SELECT SPAN(SENTENCE, window=0)")
    assert len(results) == 5
    
    # Window size 2
    results = nlql.execute("SELECT SPAN(SENTENCE, window=2)")
    assert len(results) == 5


def test_select_span_chunk_basic() -> None:
    """Test SELECT SPAN(CHUNK, window=1)."""
    adapter = MemoryAdapter()
    adapter.add_text("Chunk 1", {"id": 1})
    adapter.add_text("Chunk 2", {"id": 2})
    adapter.add_text("Chunk 3", {"id": 3})
    
    nlql = NLQL(adapter=adapter)
    results = nlql.execute("SELECT SPAN(CHUNK, window=1)")
    
    # Should have 3 spans (one for each chunk)
    assert len(results) == 3
    
    # Each span should have the chunk content as target
    assert results[0].content == "Chunk 1"
    assert results[1].content == "Chunk 2"
    assert results[2].content == "Chunk 3"


def test_select_span_with_where(adapter_with_text: MemoryAdapter) -> None:
    """Test SELECT SPAN with WHERE clause."""
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute('SELECT SPAN(SENTENCE, window=1) WHERE CONTAINS("Second")')
    
    # Should only return span for "Second sentence."
    assert len(results) == 1
    assert results[0].content == "Second sentence."


def test_select_span_with_limit(adapter_with_text: MemoryAdapter) -> None:
    """Test SELECT SPAN with LIMIT."""
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute("SELECT SPAN(SENTENCE, window=1) LIMIT 2")
    
    assert len(results) == 2


def test_select_chunk_no_transformation(adapter_with_text: MemoryAdapter) -> None:
    """Test that SELECT CHUNK doesn't transform the data."""
    nlql = NLQL(adapter=adapter_with_text)
    results = nlql.execute("SELECT CHUNK")
    
    # Should have 2 chunks (no sentence splitting)
    assert len(results) == 2
    assert "First sentence. Second sentence. Third sentence." in results[0].content
    assert "Another sentence. Yet another one." in results[1].content

