from ai_kit.shared_console import shared_console
import aiohttp
import markdownify
import asyncio
from ai_kit.utils.markdown_parser import html_to_markdown
from bs4 import BeautifulSoup
from typing import TypedDict
class BatchFetcherResult(TypedDict):
    title: str
    href: str
    snippet: str
    parsed_page_content: str

class BasicFetcher:
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                          "AppleWebKit/537.36 (KHTML, like Gecko) "
                          "Chrome/91.0.4472.124 Safari/537.36"
        }
        self.session = aiohttp.ClientSession(headers=self.headers)

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc, tb):
        await self.session.close()

    async def _fetch_page(self, url: str) -> str:
        try:
            async with self.session.get(url, timeout=10) as response:
                if response.status != 200:
                    shared_console.print(
                        f"[red]Error fetching page: {response.status}[/red]"
                    )
                    return ""
                return await response.text()
        except Exception as e:
            shared_console.print(f"[red]Error fetching page: {e}[/red]")
            return ""

    async def _parse_page_to_markdown(self, html: str) -> str:
        if not html:
            return ""
        markdown = markdownify.markdownify(
            html,
            strip=["footer", "header", "a", "svg"],
            bullets="*",
        )
        return markdown.replace("\n\n", "").strip()
    
    async def _parse_page_to_text(self, html: str) -> str:
        if not html:
            return ""
        soup = BeautifulSoup(html, "html.parser")
        text = soup.get_text(separator="\n", strip=True)
        return text
    

    async def batch_fetch_and_parse_pages(self, results: list[dict]) -> list[str]:
        async def _task(result: dict) -> str:
            html = await self._fetch_page(result["href"])
            parsed_page_content = html_to_markdown(html)
            
            return {
                "title": result["title"],
                "href": result["href"],
                "snippet": result["body"],
                "parsed_page_content": parsed_page_content,
            }

        return await asyncio.gather(*[_task(result) for result in results])