"""
Dynamic Natural Language Query Parser with Rule-Based and LLM Fallback

Provides intelligent query parsing that adapts to dynamic schema:
1. Rule-based parsing for common patterns (fast, reliable)
2. LLM fallback for complex/ambiguous queries (flexible, powerful)

Example:
    >>> parser = DynamicQueryParser(store)
    >>> query = parser.parse("people older than 30 from USA")
    >>> results = query.execute()
"""

import logging
import re
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

from .graph_query import FilterOperator, GraphQuery
from .meta import MetaPropertyType, PropValueType

if TYPE_CHECKING:
    from .element_store import ElementStore
    from .meta import MetaNode

logger = logging.getLogger(__name__)


class PropertyTypeCategory(Enum):
    """High-level property type categories for rule matching."""

    NUMERIC = {PropValueType.NUMBER}
    TEMPORAL = {PropValueType.DATE, PropValueType.DATETIME}
    BOOLEAN = {PropValueType.BOOLEAN}
    TEXT = {PropValueType.STRING}
    OPTIONS = {PropValueType.OPTIONS}
    REFERENCE = {PropValueType.ELEMENT, PropValueType.ELEMENT_ARRAY}
    STRUCTURED = {PropValueType.JSON, PropValueType.LISTITEM}

    @classmethod
    def categorize(cls, prop_type: PropValueType) -> "PropertyTypeCategory":
        """Map PropValueType to category."""
        for category in cls:
            if prop_type in category.value:
                return category
        return cls.TEXT  # Default fallback

    @classmethod
    def compatible_operators(
        cls, category: "PropertyTypeCategory"
    ) -> List[FilterOperator]:
        """Return operators compatible with property category."""
        compatibility = {
            cls.NUMERIC: [
                FilterOperator.EQ,
                FilterOperator.GT,
                FilterOperator.LT,
                FilterOperator.GTE,
                FilterOperator.LTE,
                FilterOperator.BETWEEN,
            ],
            cls.TEMPORAL: [
                FilterOperator.EQ,
                FilterOperator.GT,
                FilterOperator.LT,
                FilterOperator.GTE,
                FilterOperator.LTE,
                FilterOperator.BETWEEN,
            ],
            cls.BOOLEAN: [FilterOperator.EQ, FilterOperator.NOT],
            cls.TEXT: [
                FilterOperator.EQ,
                FilterOperator.CONTAINS,
                FilterOperator.STARTS_WITH,
                FilterOperator.ENDS_WITH,
                FilterOperator.REGEX,
            ],
            cls.OPTIONS: [FilterOperator.EQ, FilterOperator.IN],
            cls.REFERENCE: [FilterOperator.EQ, FilterOperator.IN],
            cls.STRUCTURED: [FilterOperator.CONTAINS, FilterOperator.REGEX],
        }
        return compatibility.get(category, [FilterOperator.EQ])


@dataclass
class DynamicExtraction:
    """Result of dynamic query extraction with confidence scores."""

    class_id: Optional[str] = None
    property_name: Optional[str] = None
    operator: Optional[FilterOperator] = None
    value: Optional[Any] = None
    text_search: Optional[str] = None
    sort_property: Optional[str] = None
    sort_direction: str = "asc"
    limit: Optional[int] = None

    # Confidence tracking
    confidence: float = 0.0
    requires_llm: bool = False
    llm_reason: Optional[str] = None
    applied_rules: List[str] = None

    def __post_init__(self):
        if self.applied_rules is None:
            self.applied_rules = []


class SchemaAnalyzer:
    """Analyzes metanodes to provide schema-aware extraction rules."""

    def __init__(self, store: "ElementStore"):
        """Initialize analyzer with element store."""
        self.store = store
        self.logger = logging.getLogger(__name__)
        self._metanode_cache: Dict[str, MetaNode] = {}
        self._property_aliases: Dict[str, Dict[str, str]] = {}

    def get_metanode(self, class_id: str) -> Optional["MetaNode"]:
        """Get metanode for class ID with caching."""
        if class_id in self._metanode_cache:
            return self._metanode_cache[class_id]

        # Find metanode in store
        for element in self.store.all_elements():
            if (
                hasattr(element, "class_id")
                and element.class_id == "meta"
                and hasattr(element, "id")
                and element.id == class_id
            ):
                self._metanode_cache[class_id] = element
                return element

        return None

    def get_property_type(
        self, class_id: str, property_name: str
    ) -> Optional[MetaPropertyType]:
        """Get property type definition for a class property."""
        metanode = self.get_metanode(class_id)
        if not metanode:
            return None

        # Direct property lookup
        if hasattr(metanode.properties, "property_types"):
            prop_types = metanode.properties.property_types
            if prop_types and property_name in prop_types:
                return prop_types[property_name]

        # Check aliases
        aliases = self._get_property_aliases(class_id)
        canonical_name = aliases.get(property_name, property_name)
        if prop_types and canonical_name in prop_types:
            return prop_types[canonical_name]

        return None

    def get_all_property_names(self, class_id: str) -> List[str]:
        """Get all property names for a class."""
        metanode = self.get_metanode(class_id)
        if not metanode or not hasattr(metanode.properties, "property_types"):
            return []

        prop_types = metanode.properties.property_types
        return list(prop_types.keys()) if prop_types else []

    def _get_property_aliases(self, class_id: str) -> Dict[str, str]:
        """Get common aliases for properties (age -> person.age, etc)."""
        if class_id in self._property_aliases:
            return self._property_aliases[class_id]

        aliases = {}
        prop_names = self.get_all_property_names(class_id)

        # Build reverse lookup and common aliases
        for prop_name in prop_names:
            # Common aliases
            if "age" in prop_name:
                aliases["age"] = prop_name
                aliases["years_old"] = prop_name
            elif "date" in prop_name or "created" in prop_name:
                aliases["date"] = prop_name
                aliases["when"] = prop_name
            elif "name" in prop_name:
                aliases["name"] = prop_name
                aliases["title"] = prop_name

            # Boolean active/inactive heuristics
            if prop_name == "active":
                aliases["active"] = prop_name
                aliases["inactive"] = prop_name

        self._property_aliases[class_id] = aliases
        return aliases


class RuleBasedExtractor:
    """Rule-based extraction for common query patterns."""

    def __init__(self, schema_analyzer: SchemaAnalyzer):
        """Initialize with schema analyzer."""
        self.analyzer = schema_analyzer
        self.logger = logging.getLogger(__name__)

        # Precompiled patterns
        self.patterns = {
            # Numeric comparisons
            "numeric_gt": re.compile(
                r"(?:older|greater|more|larger|higher|above|over|exceeds?)\s+than\s+([0-9.]+)",
                re.IGNORECASE,
            ),
            "numeric_gte": re.compile(
                r"(?:at\s+least|minimum|no\s+less\s+than|≥)\s+([0-9.]+)",
                re.IGNORECASE,
            ),
            "numeric_lt": re.compile(
                r"(?:younger|less|smaller|lower|below|under)\s+than\s+([0-9.]+)",
                re.IGNORECASE,
            ),
            "numeric_lte": re.compile(
                r"(?:at\s+most|maximum|no\s+more\s+than|≤)\s+([0-9.]+)",
                re.IGNORECASE,
            ),
            # Date patterns
            "date_before": re.compile(
                r"(?:before|prior to)\s+([0-9]{4}(?:-[0-9]{2})?)",
                re.IGNORECASE,
            ),
            "date_after": re.compile(
                r"(?:after|since)\s+([0-9]{4}(?:-[0-9]{2})?)", re.IGNORECASE
            ),
            # Boolean patterns
            "boolean_true": re.compile(
                r"(?:is|has|was|active|enabled)\b", re.IGNORECASE
            ),
            "boolean_false": re.compile(
                r"(?:is\s+)?(?:false|inactive|no|disabled|not\s+active)\b",
                re.IGNORECASE,
            ),
            # Text patterns
            "text_contains": re.compile(
                r"(?:containing?|with|includes?|has?|search|looking\s+for)\s+(?:the\s+)?([\"'])([^\1]+)\1",
                re.IGNORECASE,
            ),
            "text_starts": re.compile(
                r"(?:starting\s+with|begins?)\s+([\"'])([^\1]+)\1",
                re.IGNORECASE,
            ),
            # Limit/offset
            "limit": re.compile(
                r"(?:first|top|limit|take|max)\s+([0-9]+)",
                re.IGNORECASE,
            ),
            # Sorting
            "sort": re.compile(
                r"(?:sorted?|order)\s+(?:by\s+)?(\w+)(?:\s+(ascending|descending|asc|desc))?",
                re.IGNORECASE,
            ),
        }

    def extract(
        self, query: str, class_id: str, context: Optional[str] = None  # noqa: ARG002
    ) -> DynamicExtraction:
        """
        Extract query components using rules.

        Args:
            query: Natural language query
            class_id: Target class ID
            context: Optional additional context

        Returns:
            DynamicExtraction with confidence and applied rules
        """
        extraction = DynamicExtraction(class_id=class_id)
        query_lower = query.lower()

        # Rule 1: Try to detect property name
        property_name = self._detect_property(query_lower, class_id)
        if property_name:
            extraction.property_name = property_name
            extraction.applied_rules.append(f"property_detection:{property_name}")

        # Rule 2: Extract value and operator based on property type
        if extraction.property_name:
            prop_type = self.analyzer.get_property_type(
                class_id, extraction.property_name
            )
            if prop_type:
                operator, value = self._extract_by_type(
                    query_lower, prop_type, extraction.property_name
                )
                if operator and value is not None:
                    extraction.operator = operator
                    extraction.value = value
                    extraction.applied_rules.append(
                        f"type_aware_extraction:{prop_type.type.value}"
                    )
                else:
                    extraction.requires_llm = True
                    extraction.llm_reason = (
                        f"Could not extract value for {prop_type.type.value} property"
                    )
            else:
                extraction.requires_llm = True
                extraction.llm_reason = f"Unknown property: {extraction.property_name}"

        # Rule 3: Text search (works with any class)
        text_match = self.patterns["text_contains"].search(query_lower)
        if text_match:
            extraction.text_search = text_match.group(2)
            extraction.applied_rules.append("text_search")

        # Rule 4: Sorting
        sort_match = self.patterns["sort"].search(query_lower)
        if sort_match:
            extraction.sort_property = sort_match.group(1)
            extraction.sort_direction = (
                "desc"
                if sort_match.group(2) and "desc" in sort_match.group(2).lower()
                else "asc"
            )
            extraction.applied_rules.append("sort_extraction")

        # Rule 5: Limit
        limit_match = self.patterns["limit"].search(query_lower)
        if limit_match:
            extraction.limit = int(limit_match.group(1))
            extraction.applied_rules.append("limit_extraction")

        # Calculate confidence
        extraction.confidence = len(extraction.applied_rules) / 5.0  # Max 5 rules

        return extraction

    def _detect_property(self, query: str, class_id: str) -> Optional[str]:
        """Detect property name from query using metanode info."""
        available_props = self.analyzer.get_all_property_names(class_id)
        aliases = self.analyzer._get_property_aliases(class_id)

        # Heuristics for common properties (check first)
        heuristics = {
            "age": ["older", "younger", "years"],
            "created": ["created", "founded", "since"],
            "status": ["status", "state"],
            "email": ["email", "contacted"],
        }

        for prop, keywords in heuristics.items():
            if any(kw in query for kw in keywords):
                # Check if this property exists
                if prop in available_props:
                    return prop
                # Check if similar property exists
                matching_prop = next((p for p in available_props if prop in p), None)
                if matching_prop:
                    return matching_prop

        # Direct match
        for prop in available_props:
            if f" {prop} " in f" {query} ":
                return prop

        # Alias match
        for alias, canonical in aliases.items():
            if f" {alias} " in f" {query} ":
                return canonical

        return None

    def _extract_by_type(
        self,
        query: str,
        prop_type: MetaPropertyType,
        property_name: str,  # noqa: ARG002
    ) -> Tuple[Optional[FilterOperator], Optional[Any]]:
        """Extract operator and value based on property type."""
        category = PropertyTypeCategory.categorize(prop_type.type)

        if category == PropertyTypeCategory.NUMERIC:
            return self._extract_numeric(query)
        elif category == PropertyTypeCategory.TEMPORAL:
            return self._extract_temporal(query)
        elif category == PropertyTypeCategory.BOOLEAN:
            return self._extract_boolean(query)
        elif category == PropertyTypeCategory.TEXT:
            return self._extract_text(query)
        elif category == PropertyTypeCategory.OPTIONS:
            return self._extract_options(query, prop_type)
        else:
            return None, None

    def _extract_numeric(
        self, query: str
    ) -> Tuple[Optional[FilterOperator], Optional[float]]:
        """Extract numeric comparisons."""
        # GT
        match = self.patterns["numeric_gt"].search(query)
        if match:
            val_str = match.group(1)
            return FilterOperator.GT, float(val_str) if "." in val_str else int(val_str)

        # GTE
        match = self.patterns["numeric_gte"].search(query)
        if match:
            val_str = match.group(1)
            return FilterOperator.GTE, (
                float(val_str) if "." in val_str else int(val_str)
            )

        # LT
        match = self.patterns["numeric_lt"].search(query)
        if match:
            val_str = match.group(1)
            return FilterOperator.LT, float(val_str) if "." in val_str else int(val_str)

        # LTE
        match = self.patterns["numeric_lte"].search(query)
        if match:
            val_str = match.group(1)
            return FilterOperator.LTE, (
                float(val_str) if "." in val_str else int(val_str)
            )

        return None, None

    def _extract_temporal(
        self, query: str
    ) -> Tuple[Optional[FilterOperator], Optional[str]]:
        """Extract temporal comparisons."""
        # After
        match = self.patterns["date_after"].search(query)
        if match:
            return FilterOperator.GT, match.group(1)

        # Before
        match = self.patterns["date_before"].search(query)
        if match:
            return FilterOperator.LT, match.group(1)

        return None, None

    def _extract_boolean(
        self, query: str
    ) -> Tuple[Optional[FilterOperator], Optional[bool]]:
        """Extract boolean values."""
        if self.patterns["boolean_true"].search(query):
            return FilterOperator.EQ, True
        if self.patterns["boolean_false"].search(query):
            return FilterOperator.EQ, False
        return None, None

    def _extract_text(
        self, query: str
    ) -> Tuple[Optional[FilterOperator], Optional[str]]:
        """Extract text patterns."""
        # Starts with
        match = self.patterns["text_starts"].search(query)
        if match:
            return FilterOperator.STARTS_WITH, match.group(2)

        # Contains (with quotes)
        match = self.patterns["text_contains"].search(query)
        if match:
            return FilterOperator.CONTAINS, match.group(2)

        # Check for simple "with" patterns for text (John, etc)
        with_match = re.search(
            r'\bwith\s+(?:name\s+)?["\']?([a-z]+)["\']?', query, re.IGNORECASE
        )
        if with_match:
            return FilterOperator.CONTAINS, with_match.group(1)

        return None, None

    def _extract_options(
        self, query: str, prop_type: MetaPropertyType
    ) -> Tuple[Optional[FilterOperator], Optional[Any]]:
        """Extract option values."""
        if not prop_type.options:
            return None, None

        query_lower = query.lower()
        for option in prop_type.options:
            if option.lower() in query_lower:
                return FilterOperator.EQ, option

        return None, None


class DynamicQueryParser:
    """
    Hybrid parser combining rule-based and LLM approaches.

    Strategy:
    1. Use rule-based extraction first (fast, reliable)
    2. If confidence is low or property not found, optionally use LLM
    3. Return results with confidence and rule trace
    """

    def __init__(
        self,
        store: "ElementStore",
        llm_client: Optional[Any] = None,
        llm_threshold: float = 0.5,
    ):
        """
        Initialize parser.

        Args:
            store: Element store with metanodes
            llm_client: Optional LLM client for fallback (e.g., Anthropic client)
            llm_threshold: Confidence threshold below which to use LLM
        """
        self.store = store
        self.logger = logging.getLogger(__name__)
        self.schema_analyzer = SchemaAnalyzer(store)
        self.rule_extractor = RuleBasedExtractor(self.schema_analyzer)
        self.llm_client = llm_client
        self.llm_threshold = llm_threshold

    def parse(self, query_string: str) -> "DynamicQuery":
        """
        Parse a natural language query.

        First tries rule-based extraction. If confidence is below threshold,
        optionally uses LLM for more complex interpretations.

        Args:
            query_string: Human-readable query

        Returns:
            DynamicQuery ready to execute
        """
        self.logger.info(f"Parsing query: {query_string}")

        # Step 1: Detect class ID
        class_id = self._extract_class(query_string)
        if not class_id:
            self.logger.warning(f"Could not detect class ID in query: {query_string}")
            return DynamicQuery(self.store, DynamicExtraction())

        # Step 2: Rule-based extraction
        extraction = self.rule_extractor.extract(query_string, class_id)
        self.logger.debug(
            f"Rule extraction: confidence={extraction.confidence:.2f}, rules={extraction.applied_rules}"
        )

        # Step 3: LLM fallback if needed and available
        if (
            extraction.confidence < self.llm_threshold
            and extraction.requires_llm
            and self.llm_client
        ):
            self.logger.info(
                f"Confidence {extraction.confidence:.2f} below threshold {self.llm_threshold:.2f}, using LLM"
            )
            extraction = self._llm_extract(query_string, class_id, extraction)

        return DynamicQuery(self.store, extraction)

    def _extract_class(self, query: str) -> Optional[str]:
        """Extract class ID from query."""
        # Get all available classes from store
        available_classes = set()
        for element in self.store.all_elements():
            if hasattr(element, "class_id") and isinstance(element.class_id, str):
                available_classes.add(element.class_id.lower())

        # Try exact matches
        query_lower = query.lower()
        for class_id in available_classes:
            if class_id != "meta" and class_id in query_lower:
                return class_id

        # Fallback: extract first word-like token
        words = query.split()
        for word in words:
            word_lower = word.lower().rstrip("s")  # Handle plurals
            if word_lower in available_classes:
                return word_lower

        return None

    def _llm_extract(
        self,
        query: str,  # noqa: ARG002
        class_id: str,
        partial: DynamicExtraction,
    ) -> DynamicExtraction:
        """Use LLM to enhance or refine extraction."""
        # This would call Claude API to parse the query
        # For now, just return the partial extraction with note
        self.logger.info(f"LLM extraction would be used here for query: {query}")
        partial.requires_llm = False  # Mark as attempted
        return partial


class DynamicQuery:
    """Represents parsed query ready for execution."""

    def __init__(self, store: "ElementStore", extraction: DynamicExtraction):
        """Initialize with extraction result."""
        self.store = store
        self.extraction = extraction
        self.logger = logging.getLogger(__name__)

    def to_graph_query(self) -> GraphQuery:
        """Convert to GraphQuery for execution."""
        query = GraphQuery(self.store)

        if self.extraction.class_id:
            query = query.classId(self.extraction.class_id)

        if (
            self.extraction.property_name
            and self.extraction.operator
            and self.extraction.value is not None
        ):
            query = query.where(
                self.extraction.property_name,
                self.extraction.operator,
                self.extraction.value,
            )

        if self.extraction.text_search:
            query = query.text(self.extraction.text_search)

        if self.extraction.sort_property:
            query = query.sort(
                self.extraction.sort_property, self.extraction.sort_direction
            )

        if self.extraction.limit:
            query = query.first(self.extraction.limit)

        return query

    def execute(self) -> List[Any]:
        """Execute query and return results."""
        query = self.to_graph_query()
        return query.r()

    def count(self) -> int:
        """Count matching results."""
        query = self.to_graph_query()
        return query.count()

    def __repr__(self) -> str:
        """String representation."""
        parts = []

        if self.extraction.class_id:
            parts.append(f"classId('{self.extraction.class_id}')")

        if (
            self.extraction.property_name
            and self.extraction.operator
            and self.extraction.value is not None
        ):
            parts.append(
                f".where('{self.extraction.property_name}', {self.extraction.operator}, {self.extraction.value})"
            )

        if self.extraction.text_search:
            parts.append(f".text('{self.extraction.text_search}')")

        if self.extraction.sort_property:
            parts.append(
                f".sort('{self.extraction.sort_property}', '{self.extraction.sort_direction}')"
            )

        if self.extraction.limit:
            parts.append(f".first({self.extraction.limit})")

        # Add confidence info
        confidence_pct = self.extraction.confidence * 100
        parts.append(
            f"  # Confidence: {confidence_pct:.0f}%, Rules: {', '.join(self.extraction.applied_rules)}"
        )

        return "".join(parts) if parts else "GraphQuery()"
