"""Text unit abstractions for different granularities."""

from dataclasses import dataclass, field
from typing import Any


@dataclass
class TextUnit:
    """Base class for text units at different granularities.

    Attributes:
        content: The text content
        metadata: Associated metadata
        source_id: Optional identifier for the source
    """

    content: str
    metadata: dict[str, Any] = field(default_factory=dict)
    source_id: str | None = None

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(content={self.content[:50]}...)"


@dataclass
class Document(TextUnit):
    """Represents a complete document."""

    pass


@dataclass
class Chunk(TextUnit):
    """Represents a chunk (typical vector database storage unit).

    Attributes:
        chunk_id: Optional chunk identifier
        position: Position within the source document
    """

    chunk_id: str | None = None
    position: int | None = None


@dataclass
class Sentence(TextUnit):
    """Represents a single sentence.

    Attributes:
        sentence_index: Index within the parent chunk/document
        source_chunk_id: ID of the parent chunk this sentence came from
    """

    sentence_index: int | None = None
    source_chunk_id: str | None = None


@dataclass
class Span(TextUnit):
    """Represents a span with context window.

    Attributes:
        target_unit: The primary target unit
        context_before: Units before the target
        context_after: Units after the target
        window_size: Size of the context window
    """

    target_unit: TextUnit | None = None
    context_before: list[TextUnit] = field(default_factory=list)
    context_after: list[TextUnit] = field(default_factory=list)
    window_size: int = 0

