"""Tests for sentence splitting functionality."""

import pytest

from nlql import NLQL
from nlql.adapters import MemoryAdapter
from nlql.text.splitting import default_sentence_splitter


def test_default_sentence_splitter_english() -> None:
    """Test sentence splitter with English text."""
    text = "Hello world. This is a test! How are you? Fine."
    sentences = default_sentence_splitter(text)
    
    assert len(sentences) == 4
    assert sentences[0] == "Hello world."
    assert sentences[1] == "This is a test!"
    assert sentences[2] == "How are you?"
    assert sentences[3] == "Fine."


def test_default_sentence_splitter_chinese() -> None:
    """Test sentence splitter with Chinese text."""
    text = "你好世界。这是一个测试！你好吗？很好。"
    sentences = default_sentence_splitter(text)
    
    assert len(sentences) == 4
    assert sentences[0] == "你好世界。"
    assert sentences[1] == "这是一个测试！"
    assert sentences[2] == "你好吗？"
    assert sentences[3] == "很好。"


def test_default_sentence_splitter_mixed() -> None:
    """Test sentence splitter with mixed English and Chinese text."""
    text = "Hello. 你好。World! 世界！"
    sentences = default_sentence_splitter(text)
    
    assert len(sentences) == 4
    assert sentences[0] == "Hello."
    assert sentences[1] == "你好。"
    assert sentences[2] == "World!"
    assert sentences[3] == "世界！"


def test_default_sentence_splitter_no_spaces() -> None:
    """Test sentence splitter with no spaces after terminators."""
    text = "First.Second!Third?Fourth"
    sentences = default_sentence_splitter(text)
    
    assert len(sentences) == 4
    assert sentences[0] == "First."
    assert sentences[1] == "Second!"
    assert sentences[2] == "Third?"
    assert sentences[3] == "Fourth"


def test_select_sentence_granularity() -> None:
    """Test SELECT SENTENCE granularity transformation."""
    adapter = MemoryAdapter()
    adapter.add_text("First sentence. Second sentence. Third sentence.")
    
    nlql = NLQL(adapter=adapter)
    results = nlql.execute("SELECT SENTENCE")
    
    assert len(results) == 3
    assert results[0].content == "First sentence."
    assert results[1].content == "Second sentence."
    assert results[2].content == "Third sentence."
    assert all(r.unit == "SENTENCE" for r in results)


def test_select_sentence_with_where_clause() -> None:
    """Test SELECT SENTENCE with WHERE clause filtering."""
    adapter = MemoryAdapter()
    adapter.add_text("AI agents are autonomous. Machine learning is powerful. Deep learning works.")
    
    nlql = NLQL(adapter=adapter)
    results = nlql.execute("""
        SELECT SENTENCE
        WHERE CONTAINS("learning")
    """)
    
    assert len(results) == 2
    assert "Machine learning" in results[0].content
    assert "Deep learning" in results[1].content


def test_select_sentence_chinese_with_where() -> None:
    """Test SELECT SENTENCE with Chinese text and WHERE clause."""
    adapter = MemoryAdapter()
    adapter.add_document(
        "2014年,群主(昵称:pst、凛子)加入百度贴吧易语言吧。"
        "次年开始担任小吧主,四年后又辞职。"
        "2017年,我爱易语言论坛(即爱易论坛)创立,后停止维护。"
        "论坛子版块之一'爱易咖啡厅',是其灌水专区,pst为版主。"
        "2019年,本群建立。"
        "本群成员之大半壁江山来自pst在易语言吧的朋友圈,另一小半则来自ddi的朋友圈。"
    )
    
    nlql = NLQL(adapter=adapter)
    
    # Test 1: Get all sentences
    all_results = nlql.execute("SELECT SENTENCE")
    assert len(all_results) >= 6  # Should have multiple sentences
    
    # Test 2: Filter by keyword
    filtered_results = nlql.execute("""
        SELECT SENTENCE
        WHERE CONTAINS("pst")
    """)
    
    # Should return only sentences containing "pst"
    assert len(filtered_results) == 3
    assert all("pst" in r.content for r in filtered_results)
    
    # Verify specific sentences
    contents = [r.content for r in filtered_results]
    assert any("2014年" in c and "pst、凛子" in c for c in contents)
    assert any("爱易咖啡厅" in c and "pst为版主" in c for c in contents)
    assert any("朋友圈" in c and "pst在易语言吧" in c for c in contents)


def test_select_sentence_preserves_metadata() -> None:
    """Test that SELECT SENTENCE preserves metadata from parent chunks."""
    adapter = MemoryAdapter()
    adapter.add_text("First sentence. Second sentence.", {"author": "Alice", "topic": "AI"})
    
    nlql = NLQL(adapter=adapter)
    results = nlql.execute("SELECT SENTENCE")
    
    assert len(results) == 2
    # Metadata should be preserved from parent chunk
    assert results[0].metadata.get("author") == "Alice"
    assert results[0].metadata.get("topic") == "AI"
    assert results[1].metadata.get("author") == "Alice"
    assert results[1].metadata.get("topic") == "AI"


def test_select_sentence_empty_text() -> None:
    """Test SELECT SENTENCE with empty or whitespace-only text."""
    adapter = MemoryAdapter()
    adapter.add_text("   ")  # Only whitespace
    
    nlql = NLQL(adapter=adapter)
    results = nlql.execute("SELECT SENTENCE")
    
    # Should return empty list or handle gracefully
    assert len(results) == 0


def test_select_sentence_single_sentence() -> None:
    """Test SELECT SENTENCE with text containing only one sentence."""
    adapter = MemoryAdapter()
    adapter.add_text("This is a single sentence")
    
    nlql = NLQL(adapter=adapter)
    results = nlql.execute("SELECT SENTENCE")
    
    assert len(results) == 1
    assert results[0].content == "This is a single sentence"

