"""In-memory adapter for testing and simple use cases."""

import re
from typing import Any

from nlql.adapters.base import BaseAdapter, QueryPlan
from nlql.text.units import Chunk, TextUnit


class MemoryAdapter(BaseAdapter):
    """Simple in-memory adapter for testing and prototyping.

    This adapter stores chunks in memory and performs simple filtering.
    It's useful for testing, demonstrations, and small datasets without
    requiring a vector database.

    The adapter provides convenient methods for adding data:
    - add_chunk(): Add a single chunk with metadata
    - add_text(): Add text (automatically creates a chunk)
    - add_texts(): Batch add multiple texts
    - add_document(): Add a document with automatic chunking

    Example:
        >>> adapter = MemoryAdapter()
        >>> adapter.add_text("AI agents are autonomous systems", {"topic": "AI"})
        >>> adapter.add_text("Machine learning powers modern AI", {"topic": "ML"})
        >>>
        >>> from nlql import NLQL
        >>> nlql = NLQL(adapter=adapter)
        >>> results = nlql.execute("SELECT CHUNK LIMIT 10")
    """

    def __init__(self) -> None:
        """Initialize an empty memory adapter.

        Use add_chunk(), add_text(), or add_document() to populate data.
        """
        self._chunks: list[Chunk] = []

    def query(self, plan: QueryPlan) -> list[TextUnit]:  # noqa: ARG002
        """Execute a simple in-memory query.

        Note: MemoryAdapter is a simple adapter that returns all chunks.
        Filtering, ordering, and limiting are handled by the Executor.

        In the future, if plan.filters or plan.query_text are provided,
        this method could apply optimizations, but for now it returns
        all data and lets the executor handle the rest.

        Args:
            plan: Query plan (currently unused for MemoryAdapter)

        Returns:
            List of all chunks
        """
        # For MemoryAdapter, we simply return all chunks
        # The executor will handle WHERE filtering, ORDER BY, and LIMIT
        return self._chunks.copy()

    def supports_semantic_search(self) -> bool:
        """Memory adapter does not support semantic search.

        Semantic search requires embeddings and similarity computation,
        which is not implemented in the basic MemoryAdapter.
        """
        return False

    def supports_metadata_filter(self) -> bool:
        """Memory adapter does not push down metadata filters.

        While metadata filtering is supported by NLQL, the MemoryAdapter
        returns all chunks and lets the Executor handle filtering.
        This method returns False to indicate no pushdown optimization.
        """
        return False

    def add_chunk(
        self,
        content: str,
        metadata: dict[str, Any] | None = None,
        chunk_id: str | None = None,
    ) -> str:
        """Add a single chunk to the memory store.

        Args:
            content: Chunk content
            metadata: Optional metadata dictionary
            chunk_id: Optional custom chunk ID. If not provided, auto-generates one.

        Returns:
            The chunk ID of the added chunk

        Example:
            >>> adapter = MemoryAdapter()
            >>> chunk_id = adapter.add_chunk(
            ...     "AI agents are autonomous",
            ...     metadata={"topic": "AI", "date": "2024-01-01"}
            ... )
        """
        if chunk_id is None:
            chunk_id = f"chunk_{len(self._chunks)}"

        self._chunks.append(
            Chunk(
                content=content,
                metadata=metadata or {},
                chunk_id=chunk_id,
                position=len(self._chunks),
            )
        )
        return chunk_id

    def add_text(self, text: str, metadata: dict[str, Any] | None = None) -> str:
        """Add a single text as a chunk.

        This is a convenience method equivalent to add_chunk().

        Args:
            text: Text content
            metadata: Optional metadata dictionary

        Returns:
            The chunk ID of the added chunk

        Example:
            >>> adapter = MemoryAdapter()
            >>> adapter.add_text("AI agents are autonomous systems")
            >>> adapter.add_text("Machine learning powers AI", {"topic": "ML"})
        """
        return self.add_chunk(text, metadata)

    def add_texts(
        self,
        texts: list[str],
        metadatas: list[dict[str, Any]] | None = None,
    ) -> list[str]:
        """Batch add multiple texts as chunks.

        Args:
            texts: List of text contents
            metadatas: Optional list of metadata dictionaries (must match length of texts)

        Returns:
            List of chunk IDs for the added chunks

        Raises:
            ValueError: If metadatas length doesn't match texts length

        Example:
            >>> adapter = MemoryAdapter()
            >>> texts = [
            ...     "AI agents are autonomous",
            ...     "Machine learning powers AI",
            ...     "NLP enables text understanding"
            ... ]
            >>> metadatas = [
            ...     {"topic": "AI"},
            ...     {"topic": "ML"},
            ...     {"topic": "NLP"}
            ... ]
            >>> chunk_ids = adapter.add_texts(texts, metadatas)
        """
        if metadatas is not None and len(metadatas) != len(texts):
            raise ValueError(
                f"metadatas length ({len(metadatas)}) must match texts length ({len(texts)})"
            )

        chunk_ids = []
        for i, text in enumerate(texts):
            metadata = metadatas[i] if metadatas else None
            chunk_id = self.add_text(text, metadata)
            chunk_ids.append(chunk_id)

        return chunk_ids

    def add_document(
        self,
        document: str,
        metadata: dict[str, Any] | None = None,
        chunk_size: int = 500,
        chunk_overlap: int = 50,
        separator: str = "\n\n",
    ) -> list[str]:
        """Add a document with automatic chunking.

        The document will be split into chunks based on the specified parameters.
        Each chunk will inherit the document's metadata with an additional
        'chunk_index' field.

        Args:
            document: Full document text
            metadata: Optional metadata for the document (inherited by all chunks)
            chunk_size: Target size for each chunk (in characters)
            chunk_overlap: Number of characters to overlap between chunks
            separator: Separator to use for splitting (default: paragraph breaks)

        Returns:
            List of chunk IDs for the created chunks

        Example:
            >>> adapter = MemoryAdapter()
            >>> long_text = "..." # Long document
            >>> chunk_ids = adapter.add_document(
            ...     long_text,
            ...     metadata={"source": "paper.pdf", "author": "Alice"},
            ...     chunk_size=500,
            ...     chunk_overlap=50
            ... )
        """
        # Simple chunking strategy: split by separator first, then by size
        chunks = self._chunk_text(document, chunk_size, chunk_overlap, separator)

        chunk_ids = []
        base_metadata = metadata or {}

        for i, chunk_text in enumerate(chunks):
            chunk_metadata = base_metadata.copy()
            chunk_metadata["chunk_index"] = i
            chunk_metadata["total_chunks"] = len(chunks)

            chunk_id = self.add_chunk(chunk_text, chunk_metadata)
            chunk_ids.append(chunk_id)

        return chunk_ids

    def _chunk_text(
        self,
        text: str,
        chunk_size: int,
        chunk_overlap: int,
        separator: str,
    ) -> list[str]:
        """Split text into chunks.

        Args:
            text: Text to split
            chunk_size: Target chunk size
            chunk_overlap: Overlap between chunks
            separator: Separator for initial split

        Returns:
            List of text chunks
        """
        # First split by separator (e.g., paragraphs)
        segments = text.split(separator)

        chunks = []
        current_chunk = ""

        for segment in segments:
            segment = segment.strip()
            if not segment:
                continue

            # If adding this segment would exceed chunk_size, save current chunk
            if current_chunk and len(current_chunk) + len(segment) > chunk_size:
                chunks.append(current_chunk)

                # Start new chunk with overlap
                if chunk_overlap > 0 and len(current_chunk) > chunk_overlap:
                    current_chunk = current_chunk[-chunk_overlap:] + " " + segment
                else:
                    current_chunk = segment
            else:
                # Add to current chunk
                if current_chunk:
                    current_chunk += " " + segment
                else:
                    current_chunk = segment

        # Add the last chunk
        if current_chunk:
            chunks.append(current_chunk)

        return chunks

    def clear(self) -> None:
        """Clear all chunks from the adapter.

        Example:
            >>> adapter = MemoryAdapter()
            >>> adapter.add_text("Some text")
            >>> adapter.clear()
            >>> len(adapter) == 0
            True
        """
        self._chunks.clear()

    def __len__(self) -> int:
        """Get the number of chunks in the adapter."""
        return len(self._chunks)

    def __repr__(self) -> str:
        """String representation of the adapter."""
        return f"MemoryAdapter(chunks={len(self._chunks)})"

