"""SQLite storage backend implementation.

This module implements the StorageBackend interface using SQLite database.
"""

import logging
import sqlite3
from datetime import datetime
from pathlib import Path
from typing import Optional

from .base import (
    StorageBackend,
    ProjectRecord,
    FileRecord,
    ImportRecord,
    FunctionRecord,
    FunctionCallRecord,
    CodeSummaryRecord,
    FileTypeStats,
    DepthStats,
    ReferenceRankingItem,
)

logger = logging.getLogger(__name__)


class SQLiteStorage(StorageBackend):
    """SQLite storage backend implementation."""

    SCHEMA_SQL = """
    -- Projects table
    CREATE TABLE IF NOT EXISTS projects (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        path TEXT UNIQUE NOT NULL,
        name TEXT NOT NULL,
        last_scanned TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        file_count INTEGER DEFAULT 0
    );

    -- Files table
    CREATE TABLE IF NOT EXISTS files (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        project_id INTEGER NOT NULL,
        relative_path TEXT NOT NULL,
        file_type TEXT NOT NULL,
        size INTEGER DEFAULT 0,
        depth INTEGER DEFAULT 0,
        modified_time TIMESTAMP,
        FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
        UNIQUE(project_id, relative_path)
    );

    -- Imports table
    CREATE TABLE IF NOT EXISTS imports (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        file_id INTEGER NOT NULL,
        module TEXT NOT NULL,
        import_type TEXT DEFAULT 'static',
        line INTEGER,
        resolved_file_id INTEGER,
        FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
        FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
    );

    -- Functions table
    CREATE TABLE IF NOT EXISTS functions (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        file_id INTEGER NOT NULL,
        name TEXT NOT NULL,
        signature TEXT,
        start_line INTEGER,
        end_line INTEGER,
        FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
    );

    -- Function calls table
    CREATE TABLE IF NOT EXISTS function_calls (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        caller_function_id INTEGER NOT NULL,
        callee_name TEXT NOT NULL,
        callee_function_id INTEGER,
        line INTEGER,
        FOREIGN KEY (caller_function_id) REFERENCES functions(id) ON DELETE CASCADE,
        FOREIGN KEY (callee_function_id) REFERENCES functions(id) ON DELETE SET NULL
    );

    -- Code summaries table (LLM-generated)
    CREATE TABLE IF NOT EXISTS code_summaries (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        file_id INTEGER NOT NULL,
        entity_type TEXT NOT NULL,
        entity_name TEXT NOT NULL,
        signature TEXT NOT NULL,
        summary TEXT NOT NULL,
        summary_en TEXT,
        summary_zh TEXT,
        embedding BLOB,
        line_number INTEGER,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
        UNIQUE(file_id, entity_type, entity_name)
    );

    -- Indexes
    CREATE INDEX IF NOT EXISTS idx_files_project ON files(project_id);
    CREATE INDEX IF NOT EXISTS idx_files_type ON files(file_type);
    CREATE INDEX IF NOT EXISTS idx_files_depth ON files(depth);
    CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file_id);
    CREATE INDEX IF NOT EXISTS idx_imports_resolved ON imports(resolved_file_id);
    CREATE INDEX IF NOT EXISTS idx_functions_file ON functions(file_id);
    CREATE INDEX IF NOT EXISTS idx_function_calls_caller ON function_calls(caller_function_id);
    CREATE INDEX IF NOT EXISTS idx_summaries_file ON code_summaries(file_id);
    CREATE INDEX IF NOT EXISTS idx_summaries_entity ON code_summaries(entity_name);
    """

    def __init__(self, db_path: str = "code_knowledge.db"):
        """Initialize SQLite storage.

        Args:
            db_path: Path to the SQLite database file
        """
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path, check_same_thread=False)
        self.conn.row_factory = sqlite3.Row
        self.conn.execute("PRAGMA foreign_keys = ON")
        self._init_schema()

    def _init_schema(self) -> None:
        """Initialize database schema."""
        cursor = self.conn.cursor()
        cursor.executescript(self.SCHEMA_SQL)
        self.conn.commit()
        self._migrate_schema()

    def _migrate_schema(self) -> None:
        """Apply schema migrations for compatibility."""
        cursor = self._get_cursor()

        # Check if code_summaries table needs migration
        cursor.execute("PRAGMA table_info(code_summaries)")
        columns = {col[1] for col in cursor.fetchall()}

        # Add new columns if they don't exist
        migrations = [
            ("summary_en", "ALTER TABLE code_summaries ADD COLUMN summary_en TEXT"),
            ("summary_zh", "ALTER TABLE code_summaries ADD COLUMN summary_zh TEXT"),
            ("embedding", "ALTER TABLE code_summaries ADD COLUMN embedding BLOB"),
        ]

        for col_name, migration_sql in migrations:
            if col_name not in columns:
                try:
                    cursor.execute(migration_sql)
                    logger.info(f"Migrated code_summaries: added {col_name} column")
                except sqlite3.Error as e:
                    logger.warning(f"Migration failed for {col_name}: {e}")

        self.conn.commit()

    def _get_cursor(self) -> sqlite3.Cursor:
        """Get a database cursor."""
        return self.conn.cursor()

    def save_project(
        self,
        path: Path,
        files: list,
        graph: dict
    ) -> int:
        """Save project data to storage."""
        cursor = self._get_cursor()
        path_str = str(path.resolve())
        project_name = path.name

        # Check if project exists
        existing = self.get_project(path_str)
        if existing:
            project_id = existing.id
            # Delete existing files (cascade deletes imports, functions, etc.)
            cursor.execute("DELETE FROM files WHERE project_id = ?", (project_id,))
            # Update project record
            cursor.execute(
                """
                UPDATE projects
                SET last_scanned = CURRENT_TIMESTAMP, file_count = ?
                WHERE id = ?
                """,
                (len(files), project_id)
            )
        else:
            # Insert new project
            cursor.execute(
                """
                INSERT INTO projects (path, name, file_count)
                VALUES (?, ?, ?)
                """,
                (path_str, project_name, len(files))
            )
            project_id = cursor.lastrowid

        # Build path to file_id mapping for import resolution
        path_to_file_id: dict[str, int] = {}

        # Insert files
        for file_info in files:
            rel_path = file_info.relative_path
            depth = len(Path(rel_path).parts) - 1  # Depth from root

            try:
                modified_time = datetime.fromtimestamp(
                    Path(path / rel_path).stat().st_mtime
                )
            except (OSError, FileNotFoundError):
                modified_time = datetime.now()

            cursor.execute(
                """
                INSERT INTO files (project_id, relative_path, file_type, size, depth, modified_time)
                VALUES (?, ?, ?, ?, ?, ?)
                """,
                (
                    project_id,
                    rel_path,
                    file_info.file_type,
                    file_info.size,
                    depth,
                    modified_time.isoformat()
                )
            )
            file_id = cursor.lastrowid
            path_to_file_id[rel_path] = file_id

        # Build edge list per source file for import resolution
        # Note: Edge.to_dict() uses "from" and "to" keys
        # Edges are in the same order as imports in the source file
        source_edges: dict[str, list[str]] = {}
        if "edges" in graph:
            for edge in graph["edges"]:
                source = edge.get("from", "")
                target = edge.get("to", "")
                if source not in source_edges:
                    source_edges[source] = []
                source_edges[source].append(target)

        # Insert imports
        for file_info in files:
            file_id = path_to_file_id.get(file_info.relative_path)
            if not file_id:
                continue

            source_path = file_info.relative_path
            edges = source_edges.get(source_path, [])

            for idx, imp in enumerate(file_info.imports):
                # Resolve import to file_id if it's internal
                resolved_file_id = None

                # Match by index - edges are in same order as imports
                if idx < len(edges):
                    target = edges[idx]
                    # Skip external dependencies
                    if not target.startswith("external:") and target in path_to_file_id:
                        resolved_file_id = path_to_file_id[target]

                cursor.execute(
                    """
                    INSERT INTO imports (file_id, module, import_type, line, resolved_file_id)
                    VALUES (?, ?, ?, ?, ?)
                    """,
                    (
                        file_id,
                        imp.module,
                        imp.import_type,
                        imp.line,
                        resolved_file_id
                    )
                )

        self.conn.commit()
        logger.info(f"Saved project {project_name} with {len(files)} files")
        return project_id

    def save_project_incremental(
        self,
        path: Path,
        files: list,
        graph: dict
    ) -> tuple[int, dict]:
        """Save project data with incremental updates based on file modification time.

        Only updates files that have been modified since the last scan.
        Returns a tuple of (project_id, update_stats).

        Args:
            path: Project root path
            files: List of FileInfo objects from scanner
            graph: Dependency graph data

        Returns:
            Tuple of (project_id, stats_dict) where stats_dict contains:
                - added: number of new files
                - updated: number of updated files
                - unchanged: number of unchanged files
                - removed: number of removed files
        """
        cursor = self._get_cursor()
        path_str = str(path.resolve())
        project_name = path.name

        stats = {"added": 0, "updated": 0, "unchanged": 0, "removed": 0}

        # Check if project exists
        existing = self.get_project(path_str)
        if not existing:
            # First scan - use regular save
            project_id = self.save_project(path, files, graph)
            stats["added"] = len(files)
            return project_id, stats

        project_id = existing.id

        # Get existing files and their modification times
        existing_files = {}
        cursor.execute(
            """
            SELECT relative_path, modified_time, id
            FROM files WHERE project_id = ?
            """,
            (project_id,)
        )
        for row in cursor.fetchall():
            existing_files[row["relative_path"]] = {
                "modified_time": row["modified_time"],
                "id": row["id"]
            }

        # Categorize files
        new_files = []
        modified_files = []
        unchanged_files = []
        current_paths = set()

        for file_info in files:
            rel_path = file_info.relative_path
            current_paths.add(rel_path)

            if rel_path not in existing_files:
                new_files.append(file_info)
            else:
                # Check modification time
                try:
                    current_mtime = datetime.fromtimestamp(
                        Path(path / rel_path).stat().st_mtime
                    )
                except (OSError, FileNotFoundError):
                    current_mtime = datetime.now()

                stored_mtime = existing_files[rel_path]["modified_time"]
                if isinstance(stored_mtime, str):
                    stored_mtime = datetime.fromisoformat(stored_mtime)

                # Compare timestamps (with 1 second tolerance)
                if abs((current_mtime - stored_mtime).total_seconds()) > 1:
                    modified_files.append(file_info)
                else:
                    unchanged_files.append(file_info)

        # Find removed files
        removed_paths = set(existing_files.keys()) - current_paths

        # Delete removed files
        for removed_path in removed_paths:
            file_id = existing_files[removed_path]["id"]
            cursor.execute("DELETE FROM files WHERE id = ?", (file_id,))
            stats["removed"] += 1

        # Delete modified files (will be re-inserted)
        for file_info in modified_files:
            file_id = existing_files[file_info.relative_path]["id"]
            cursor.execute("DELETE FROM files WHERE id = ?", (file_id,))

        # Build path to file_id mapping (include unchanged files)
        path_to_file_id: dict[str, int] = {}
        for file_info in unchanged_files:
            path_to_file_id[file_info.relative_path] = (
                existing_files[file_info.relative_path]["id"]
            )

        # Insert new and modified files
        files_to_insert = new_files + modified_files
        for file_info in files_to_insert:
            rel_path = file_info.relative_path
            depth = len(Path(rel_path).parts) - 1

            try:
                modified_time = datetime.fromtimestamp(
                    Path(path / rel_path).stat().st_mtime
                )
            except (OSError, FileNotFoundError):
                modified_time = datetime.now()

            cursor.execute(
                """
                INSERT INTO files (project_id, relative_path, file_type, size, depth, modified_time)
                VALUES (?, ?, ?, ?, ?, ?)
                """,
                (
                    project_id,
                    rel_path,
                    file_info.file_type,
                    file_info.size,
                    depth,
                    modified_time.isoformat()
                )
            )
            file_id = cursor.lastrowid
            path_to_file_id[rel_path] = file_id

        # Build edge list per source file for import resolution
        # Note: Edge.to_dict() uses "from" and "to" keys
        # Edges are in the same order as imports in the source file
        source_edges: dict[str, list[str]] = {}
        if "edges" in graph:
            for edge in graph["edges"]:
                source = edge.get("from", "")
                target = edge.get("to", "")
                if source not in source_edges:
                    source_edges[source] = []
                source_edges[source].append(target)

        # Insert imports for new and modified files
        for file_info in files_to_insert:
            file_id = path_to_file_id.get(file_info.relative_path)
            if not file_id:
                continue

            source_path = file_info.relative_path
            edges = source_edges.get(source_path, [])

            for idx, imp in enumerate(file_info.imports):
                resolved_file_id = None

                # Match by index - edges are in same order as imports
                if idx < len(edges):
                    target = edges[idx]
                    # Skip external dependencies
                    if not target.startswith("external:") and target in path_to_file_id:
                        resolved_file_id = path_to_file_id[target]

                cursor.execute(
                    """
                    INSERT INTO imports (file_id, module, import_type, line, resolved_file_id)
                    VALUES (?, ?, ?, ?, ?)
                    """,
                    (
                        file_id,
                        imp.module,
                        imp.import_type,
                        imp.line,
                        resolved_file_id
                    )
                )

        # Update project record
        cursor.execute(
            """
            UPDATE projects
            SET last_scanned = CURRENT_TIMESTAMP, file_count = ?
            WHERE id = ?
            """,
            (len(files), project_id)
        )

        self.conn.commit()

        stats["added"] = len(new_files)
        stats["updated"] = len(modified_files)
        stats["unchanged"] = len(unchanged_files)

        logger.info(
            f"Incremental update for {project_name}: "
            f"{stats['added']} added, {stats['updated']} updated, "
            f"{stats['unchanged']} unchanged, {stats['removed']} removed"
        )
        return project_id, stats

    def get_files_needing_update(
        self,
        project_path: Path,
        file_paths: list[str]
    ) -> list[str]:
        """Get list of files that need updating based on modification time.

        Args:
            project_path: Project root path
            file_paths: List of relative file paths to check

        Returns:
            List of relative paths that need updating
        """
        project = self.get_project(str(project_path.resolve()))
        if not project:
            return file_paths  # All files need update if project doesn't exist

        cursor = self._get_cursor()
        needs_update = []

        for rel_path in file_paths:
            cursor.execute(
                """
                SELECT modified_time FROM files
                WHERE project_id = ? AND relative_path = ?
                """,
                (project.id, rel_path)
            )
            row = cursor.fetchone()

            if not row:
                needs_update.append(rel_path)
                continue

            try:
                current_mtime = datetime.fromtimestamp(
                    Path(project_path / rel_path).stat().st_mtime
                )
            except (OSError, FileNotFoundError):
                needs_update.append(rel_path)
                continue

            stored_mtime = row["modified_time"]
            if isinstance(stored_mtime, str):
                stored_mtime = datetime.fromisoformat(stored_mtime)

            if abs((current_mtime - stored_mtime).total_seconds()) > 1:
                needs_update.append(rel_path)

        return needs_update

    def _module_matches_path(self, module: str, path: str) -> bool:
        """Check if a module name could match a file path."""
        # Normalize module to path-like format
        module_path = module.replace(".", "/")
        path_without_ext = Path(path).with_suffix("").as_posix()

        return (
            path_without_ext.endswith(module_path) or
            module_path in path_without_ext or
            module in path
        )

    def get_project(self, path: str) -> Optional[ProjectRecord]:
        """Get project record by path."""
        cursor = self._get_cursor()
        cursor.execute(
            "SELECT * FROM projects WHERE path = ?",
            (path,)
        )
        row = cursor.fetchone()
        if row:
            return ProjectRecord(
                id=row["id"],
                path=row["path"],
                name=row["name"],
                last_scanned=row["last_scanned"],
                file_count=row["file_count"]
            )
        return None

    def get_project_by_id(self, project_id: int) -> Optional[ProjectRecord]:
        """Get project record by ID."""
        cursor = self._get_cursor()
        cursor.execute(
            "SELECT * FROM projects WHERE id = ?",
            (project_id,)
        )
        row = cursor.fetchone()
        if row:
            return ProjectRecord(
                id=row["id"],
                path=row["path"],
                name=row["name"],
                last_scanned=row["last_scanned"],
                file_count=row["file_count"]
            )
        return None

    def get_files_by_project(
        self,
        project_id: int,
        file_type: Optional[str] = None,
        subdirectory: Optional[str] = None
    ) -> list[FileRecord]:
        """Get all files for a project."""
        cursor = self._get_cursor()

        query = "SELECT * FROM files WHERE project_id = ?"
        params: list = [project_id]

        if file_type:
            query += " AND file_type = ?"
            params.append(file_type)

        if subdirectory:
            # Filter by subdirectory prefix
            query += " AND relative_path LIKE ?"
            params.append(f"{subdirectory}%")

        cursor.execute(query, params)
        rows = cursor.fetchall()

        return [
            FileRecord(
                id=row["id"],
                project_id=row["project_id"],
                relative_path=row["relative_path"],
                file_type=row["file_type"],
                size=row["size"],
                depth=row["depth"],
                modified_time=row["modified_time"]
            )
            for row in rows
        ]

    def get_file_by_path(
        self,
        project_id: int,
        relative_path: str
    ) -> Optional[FileRecord]:
        """Get a file record by its relative path."""
        cursor = self._get_cursor()
        cursor.execute(
            """
            SELECT * FROM files
            WHERE project_id = ? AND relative_path = ?
            """,
            (project_id, relative_path)
        )
        row = cursor.fetchone()
        if row:
            return FileRecord(
                id=row["id"],
                project_id=row["project_id"],
                relative_path=row["relative_path"],
                file_type=row["file_type"],
                size=row["size"],
                depth=row["depth"],
                modified_time=row["modified_time"]
            )
        return None

    def get_imports_by_file(self, file_id: int) -> list[ImportRecord]:
        """Get all imports for a file."""
        cursor = self._get_cursor()
        cursor.execute(
            "SELECT * FROM imports WHERE file_id = ?",
            (file_id,)
        )
        rows = cursor.fetchall()

        return [
            ImportRecord(
                id=row["id"],
                file_id=row["file_id"],
                module=row["module"],
                import_type=row["import_type"],
                line=row["line"],
                resolved_file_id=row["resolved_file_id"]
            )
            for row in rows
        ]

    def get_file_stats(
        self,
        project_id: int,
        subdirectory: Optional[str] = None
    ) -> list[FileTypeStats]:
        """Get file type statistics for a project."""
        cursor = self._get_cursor()

        if subdirectory:
            cursor.execute(
                """
                SELECT file_type, COUNT(*) as count, SUM(size) as total_size
                FROM files
                WHERE project_id = ? AND relative_path LIKE ?
                GROUP BY file_type
                ORDER BY count DESC
                """,
                (project_id, f"{subdirectory}%")
            )
        else:
            cursor.execute(
                """
                SELECT file_type, COUNT(*) as count, SUM(size) as total_size
                FROM files
                WHERE project_id = ?
                GROUP BY file_type
                ORDER BY count DESC
                """,
                (project_id,)
            )

        rows = cursor.fetchall()

        # Calculate total for percentage
        total_count = sum(row["count"] for row in rows)
        if total_count == 0:
            return []

        return [
            FileTypeStats(
                file_type=row["file_type"],
                count=row["count"],
                percentage=round(row["count"] / total_count * 100, 2),
                total_size=row["total_size"] or 0
            )
            for row in rows
        ]

    def get_reference_ranking(
        self,
        project_id: int,
        limit: int = 20,
        file_type: Optional[str] = None
    ) -> list[ReferenceRankingItem]:
        """Get files ranked by incoming reference count."""
        cursor = self._get_cursor()

        # Query to count references and get referencing files
        if file_type:
            cursor.execute(
                """
                SELECT
                    f.relative_path,
                    COUNT(i.id) as ref_count,
                    GROUP_CONCAT(DISTINCT sf.relative_path) as referencing_files
                FROM files f
                LEFT JOIN imports i ON i.resolved_file_id = f.id
                LEFT JOIN files sf ON i.file_id = sf.id
                WHERE f.project_id = ?
                    AND f.file_type = ?
                    AND i.resolved_file_id IS NOT NULL
                GROUP BY f.id
                HAVING ref_count > 0
                ORDER BY ref_count DESC
                LIMIT ?
                """,
                (project_id, file_type, limit)
            )
        else:
            cursor.execute(
                """
                SELECT
                    f.relative_path,
                    COUNT(i.id) as ref_count,
                    GROUP_CONCAT(DISTINCT sf.relative_path) as referencing_files
                FROM files f
                LEFT JOIN imports i ON i.resolved_file_id = f.id
                LEFT JOIN files sf ON i.file_id = sf.id
                WHERE f.project_id = ?
                    AND i.resolved_file_id IS NOT NULL
                GROUP BY f.id
                HAVING ref_count > 0
                ORDER BY ref_count DESC
                LIMIT ?
                """,
                (project_id, limit)
            )

        rows = cursor.fetchall()

        return [
            ReferenceRankingItem(
                file_path=row["relative_path"],
                reference_count=row["ref_count"],
                referencing_files=(
                    row["referencing_files"].split(",")
                    if row["referencing_files"]
                    else []
                )
            )
            for row in rows
        ]

    def get_depth_stats(
        self,
        project_id: int,
        subdirectory: Optional[str] = None
    ) -> DepthStats:
        """Get directory and file depth statistics."""
        cursor = self._get_cursor()

        if subdirectory:
            # Adjust depth relative to subdirectory
            subdir_depth = len(Path(subdirectory).parts)
            cursor.execute(
                """
                SELECT
                    MIN(depth - ?) as min_depth,
                    MAX(depth - ?) as max_depth,
                    AVG(depth - ?) as avg_depth,
                    depth - ? as adj_depth,
                    COUNT(*) as count
                FROM files
                WHERE project_id = ? AND relative_path LIKE ?
                GROUP BY adj_depth
                """,
                (subdir_depth, subdir_depth, subdir_depth, subdir_depth,
                 project_id, f"{subdirectory}%")
            )
        else:
            cursor.execute(
                """
                SELECT depth, COUNT(*) as count
                FROM files
                WHERE project_id = ?
                GROUP BY depth
                """,
                (project_id,)
            )

        rows = cursor.fetchall()

        if not rows:
            return DepthStats(
                max_directory_depth=0,
                max_file_depth=0,
                min_file_depth=0,
                avg_file_depth=0.0,
                depth_distribution={}
            )

        # Build depth distribution
        depth_distribution = {}
        total_depth = 0
        total_count = 0
        min_depth = float("inf")
        max_depth = 0

        for row in rows:
            depth = row["depth"] if "depth" in row.keys() else row["adj_depth"]
            count = row["count"]
            depth_distribution[depth] = count
            total_depth += depth * count
            total_count += count
            min_depth = min(min_depth, depth)
            max_depth = max(max_depth, depth)

        avg_depth = total_depth / total_count if total_count > 0 else 0

        return DepthStats(
            max_directory_depth=max_depth,
            max_file_depth=max_depth,
            min_file_depth=int(min_depth) if min_depth != float("inf") else 0,
            avg_file_depth=round(avg_depth, 2),
            depth_distribution=depth_distribution
        )

    def get_file_imports(
        self,
        project_id: int,
        file_path: str
    ) -> list[str]:
        """Get files that the target file imports (outgoing dependencies)."""
        cursor = self._get_cursor()
        cursor.execute(
            """
            SELECT DISTINCT rf.relative_path
            FROM files f
            JOIN imports i ON i.file_id = f.id
            JOIN files rf ON i.resolved_file_id = rf.id
            WHERE f.project_id = ? AND f.relative_path = ?
            """,
            (project_id, file_path)
        )
        return [row["relative_path"] for row in cursor.fetchall()]

    def get_file_importers(
        self,
        project_id: int,
        file_path: str
    ) -> list[str]:
        """Get files that import the target file (incoming dependencies)."""
        cursor = self._get_cursor()
        cursor.execute(
            """
            SELECT DISTINCT sf.relative_path
            FROM files f
            JOIN imports i ON i.resolved_file_id = f.id
            JOIN files sf ON i.file_id = sf.id
            WHERE f.project_id = ? AND f.relative_path = ?
            """,
            (project_id, file_path)
        )
        return [row["relative_path"] for row in cursor.fetchall()]

    def save_functions(
        self,
        file_id: int,
        functions: list[dict]
    ) -> list[int]:
        """Save function records for a file."""
        cursor = self._get_cursor()
        function_ids = []

        for func in functions:
            cursor.execute(
                """
                INSERT INTO functions (file_id, name, signature, start_line, end_line)
                VALUES (?, ?, ?, ?, ?)
                """,
                (
                    file_id,
                    func["name"],
                    func.get("signature", ""),
                    func.get("start_line", 0),
                    func.get("end_line", 0)
                )
            )
            function_ids.append(cursor.lastrowid)

        self.conn.commit()
        return function_ids

    def get_functions_by_file(self, file_id: int) -> list[FunctionRecord]:
        """Get all functions for a file."""
        cursor = self._get_cursor()
        cursor.execute(
            "SELECT * FROM functions WHERE file_id = ?",
            (file_id,)
        )
        rows = cursor.fetchall()

        return [
            FunctionRecord(
                id=row["id"],
                file_id=row["file_id"],
                name=row["name"],
                signature=row["signature"] or "",
                start_line=row["start_line"],
                end_line=row["end_line"]
            )
            for row in rows
        ]

    def save_function_calls(
        self,
        caller_function_id: int,
        calls: list[dict]
    ) -> list[int]:
        """Save function call records."""
        cursor = self._get_cursor()
        call_ids = []

        for call in calls:
            cursor.execute(
                """
                INSERT INTO function_calls
                (caller_function_id, callee_name, callee_function_id, line)
                VALUES (?, ?, ?, ?)
                """,
                (
                    caller_function_id,
                    call["callee_name"],
                    call.get("callee_function_id"),
                    call.get("line", 0)
                )
            )
            call_ids.append(cursor.lastrowid)

        self.conn.commit()
        return call_ids

    def get_function_calls(
        self,
        function_id: int
    ) -> list[FunctionCallRecord]:
        """Get all function calls made by a function."""
        cursor = self._get_cursor()
        cursor.execute(
            "SELECT * FROM function_calls WHERE caller_function_id = ?",
            (function_id,)
        )
        rows = cursor.fetchall()

        return [
            FunctionCallRecord(
                id=row["id"],
                caller_function_id=row["caller_function_id"],
                callee_name=row["callee_name"],
                callee_function_id=row["callee_function_id"],
                line=row["line"]
            )
            for row in rows
        ]

    def save_summaries(
        self,
        summaries: list[dict]
    ) -> list[int]:
        """Save code summaries with bilingual support and embeddings."""
        cursor = self._get_cursor()
        summary_ids = []

        for summary in summaries:
            try:
                # Serialize embedding to bytes if present
                embedding_blob = None
                if summary.get("embedding"):
                    import json
                    embedding_blob = json.dumps(summary["embedding"]).encode("utf-8")

                cursor.execute(
                    """
                    INSERT OR REPLACE INTO code_summaries
                    (file_id, entity_type, entity_name, signature, summary,
                     summary_en, summary_zh, embedding, line_number)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
                    """,
                    (
                        summary["file_id"],
                        summary["entity_type"],
                        summary["entity_name"],
                        summary["signature"],
                        summary["summary"],
                        summary.get("summary_en", ""),
                        summary.get("summary_zh", ""),
                        embedding_blob,
                        summary.get("line_number", 0)
                    )
                )
                summary_ids.append(cursor.lastrowid)
            except sqlite3.Error as e:
                logger.error(f"Failed to save summary: {e}")
                continue

        self.conn.commit()
        return summary_ids

    def get_summary(
        self,
        file_id: int,
        entity_name: str
    ) -> Optional[CodeSummaryRecord]:
        """Get code summary for an entity."""
        cursor = self._get_cursor()
        cursor.execute(
            """
            SELECT * FROM code_summaries
            WHERE file_id = ? AND entity_name = ?
            """,
            (file_id, entity_name)
        )
        row = cursor.fetchone()
        if row:
            # Deserialize embedding if present
            embedding = None
            if row["embedding"]:
                import json
                try:
                    embedding = json.loads(row["embedding"].decode("utf-8"))
                except (json.JSONDecodeError, AttributeError):
                    pass

            return CodeSummaryRecord(
                id=row["id"],
                file_id=row["file_id"],
                entity_type=row["entity_type"],
                entity_name=row["entity_name"],
                signature=row["signature"],
                summary=row["summary"],
                line_number=row["line_number"],
                summary_en=row["summary_en"] or "",
                summary_zh=row["summary_zh"] or "",
                embedding=embedding,
                created_at=row["created_at"]
            )
        return None

    def get_summaries_by_file(
        self,
        file_id: int
    ) -> list[CodeSummaryRecord]:
        """Get all summaries for a file."""
        cursor = self._get_cursor()
        cursor.execute(
            "SELECT * FROM code_summaries WHERE file_id = ?",
            (file_id,)
        )
        rows = cursor.fetchall()

        results = []
        for row in rows:
            # Deserialize embedding if present
            embedding = None
            if row["embedding"]:
                import json
                try:
                    embedding = json.loads(row["embedding"].decode("utf-8"))
                except (json.JSONDecodeError, AttributeError):
                    pass

            results.append(CodeSummaryRecord(
                id=row["id"],
                file_id=row["file_id"],
                entity_type=row["entity_type"],
                entity_name=row["entity_name"],
                signature=row["signature"],
                summary=row["summary"],
                line_number=row["line_number"],
                summary_en=row["summary_en"] or "",
                summary_zh=row["summary_zh"] or "",
                embedding=embedding,
                created_at=row["created_at"]
            ))
        return results

    def search_by_embedding(
        self,
        project_id: int,
        query_embedding: list[float],
        limit: int = 10
    ) -> list[CodeSummaryRecord]:
        """Search for similar code summaries by embedding (cosine similarity).

        Note: This is a simple implementation. For production use with large
        datasets, consider using a dedicated vector database.

        Args:
            project_id: Project ID
            query_embedding: Query vector
            limit: Maximum results to return

        Returns:
            List of similar code summaries sorted by similarity
        """
        import json
        import math

        cursor = self._get_cursor()

        # Get all summaries with embeddings for this project
        cursor.execute(
            """
            SELECT cs.*, f.relative_path
            FROM code_summaries cs
            JOIN files f ON cs.file_id = f.id
            WHERE f.project_id = ? AND cs.embedding IS NOT NULL
            """,
            (project_id,)
        )
        rows = cursor.fetchall()

        def cosine_similarity(a: list[float], b: list[float]) -> float:
            """Calculate cosine similarity between two vectors."""
            if not a or not b or len(a) != len(b):
                return 0.0
            dot_product = sum(x * y for x, y in zip(a, b))
            norm_a = math.sqrt(sum(x * x for x in a))
            norm_b = math.sqrt(sum(x * x for x in b))
            if norm_a == 0 or norm_b == 0:
                return 0.0
            return dot_product / (norm_a * norm_b)

        # Calculate similarity scores
        scored_results = []
        for row in rows:
            try:
                embedding = json.loads(row["embedding"].decode("utf-8"))
                similarity = cosine_similarity(query_embedding, embedding)

                record = CodeSummaryRecord(
                    id=row["id"],
                    file_id=row["file_id"],
                    entity_type=row["entity_type"],
                    entity_name=row["entity_name"],
                    signature=row["signature"],
                    summary=row["summary"],
                    line_number=row["line_number"],
                    summary_en=row["summary_en"] or "",
                    summary_zh=row["summary_zh"] or "",
                    embedding=embedding,
                    created_at=row["created_at"]
                )
                scored_results.append((similarity, record))
            except (json.JSONDecodeError, AttributeError, TypeError):
                continue

        # Sort by similarity (descending) and return top results
        scored_results.sort(key=lambda x: x[0], reverse=True)
        return [record for _, record in scored_results[:limit]]

    def list_projects(self) -> list[ProjectRecord]:
        """List all stored projects."""
        cursor = self._get_cursor()
        cursor.execute(
            "SELECT * FROM projects ORDER BY last_scanned DESC"
        )
        rows = cursor.fetchall()
        
        return [
            ProjectRecord(
                id=row["id"],
                path=row["path"],
                name=row["name"],
                last_scanned=row["last_scanned"],
                file_count=row["file_count"]
            )
            for row in rows
        ]

    def delete_project(self, project_id: int) -> bool:
        """Delete a project and all its data."""
        cursor = self._get_cursor()
        cursor.execute(
            "DELETE FROM projects WHERE id = ?",
            (project_id,)
        )
        deleted = cursor.rowcount > 0
        self.conn.commit()
        return deleted

    def close(self) -> None:
        """Close the storage connection."""
        if self.conn:
            self.conn.close()
            self.conn = None
