"""
Stapply.ai Jobs API Client

This module provides a production-ready client for interacting with the Stapply.ai jobs platform.
The site is a Next.js application using React Server Components (RSC), so we use Playwright
for proper rendering and interaction.

Author: Reverse-engineered from HAR file
Date: 2024
"""

from typing import List, Dict, Optional, Any
from dataclasses import dataclass
from datetime import datetime
import json
import logging
import time
from playwright.sync_api import sync_playwright, Page, Browser, BrowserContext
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


@dataclass
class Job:
    """Represents a job listing on Stapply.ai"""
    title: str
    company: str
    url: str
    relative_url: str
    slug: str
    external_apply_url: Optional[str] = None
    location: Optional[str] = None
    description: Optional[str] = None

    def __repr__(self) -> str:
        return f"Job(title='{self.title}', company='{self.company}', url='{self.url}')"


class StapplyAPIClient:
    """
    Production-ready client for Stapply.ai Jobs API.

    This client uses Playwright to handle the Next.js/React Server Components architecture,
    ensuring proper page rendering and interaction.

    Example:
        >>> client = StapplyAPIClient()
        >>> client.start()
        >>> jobs = client.get_jobs()
        >>> first_job = jobs[0]
        >>> job_details = client.get_job_details(first_job.relative_url)
        >>> client.stop()
    """

    BASE_URL = "https://map.stapply.ai"
    JOBS_PATH = "/jobs"

    def __init__(
        self,
        headless: bool = True,
        timeout: int = 30000,
        user_agent: Optional[str] = None
    ):
        """
        Initialize the Stapply API client.

        Args:
            headless: Whether to run browser in headless mode
            timeout: Default timeout for operations in milliseconds
            user_agent: Custom user agent string (uses realistic default if None)
        """
        self.headless = headless
        self.timeout = timeout
        self.user_agent = user_agent or (
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 '
            '(KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36'
        )

        self.playwright = None
        self.browser: Optional[Browser] = None
        self.context: Optional[BrowserContext] = None
        self.page: Optional[Page] = None

    def start(self) -> None:
        """Start the browser session."""
        logger.info("Starting Stapply API client...")

        self.playwright = sync_playwright().start()
        self.browser = self.playwright.chromium.launch(headless=self.headless)

        self.context = self.browser.new_context(
            user_agent=self.user_agent,
            viewport={'width': 1920, 'height': 1080},
            locale='en-US',
        )

        # Set realistic headers
        self.context.set_extra_http_headers({
            'Accept-Language': 'en-US,en;q=0.9',
            'Accept-Encoding': 'gzip, deflate, br',
            'DNT': '1',
        })

        self.page = self.context.new_page()
        self.page.set_default_timeout(self.timeout)

        logger.info("Browser session started successfully")

    def stop(self) -> None:
        """Stop the browser session and clean up resources."""
        logger.info("Stopping Stapply API client...")

        if self.page:
            self.page.close()
        if self.context:
            self.context.close()
        if self.browser:
            self.browser.close()
        if self.playwright:
            self.playwright.stop()

        logger.info("Browser session stopped")

    def __enter__(self):
        """Context manager entry."""
        self.start()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.stop()

    def get_jobs(self, wait_time: int = 2000) -> List[Job]:
        """
        Fetch all jobs from the main jobs page.

        Args:
            wait_time: Time to wait for page to fully load (milliseconds)

        Returns:
            List of Job objects

        Raises:
            RuntimeError: If browser session not started
            Exception: If page fails to load or parse
        """
        if not self.page:
            raise RuntimeError("Browser not started. Call start() first.")

        logger.info("Fetching jobs from main page...")

        try:
            # Navigate to jobs page
            url = f"{self.BASE_URL}{self.JOBS_PATH}"
            self.page.goto(url, wait_until='networkidle')

            # Wait for dynamic content to load
            self.page.wait_for_timeout(wait_time)

            # Find all job links
            job_links = self.page.query_selector_all('a[href*="/jobs/"]')
            logger.info(f"Found {len(job_links)} total links")

            # Extract job data
            jobs = []
            seen_urls = set()

            for link in job_links:
                try:
                    href = link.get_attribute('href')

                    # Filter for actual job pages (format: /jobs/company/job-slug)
                    if not href or href in seen_urls:
                        continue

                    # Skip external links and company-only pages
                    if href.startswith('http') or href.count('/') < 3:
                        continue

                    seen_urls.add(href)

                    # Parse URL: /jobs/company/job-slug
                    parts = href.strip('/').split('/')
                    if len(parts) >= 3 and parts[0] == 'jobs':
                        company = parts[1]
                        slug = parts[2]

                        # Get job title
                        title_elem = link.query_selector('h2, h3, .job-title, [class*="title"]')
                        title = title_elem.inner_text().strip() if title_elem else slug.replace('-', ' ').title()

                        # If no title found, use the link text
                        if not title or title == slug.replace('-', ' ').title():
                            link_text = link.inner_text().strip()
                            if link_text and len(link_text) < 200:
                                title = link_text

                        job = Job(
                            title=title,
                            company=company,
                            url=f"{self.BASE_URL}{href}",
                            relative_url=href,
                            slug=slug
                        )
                        jobs.append(job)

                except Exception as e:
                    logger.warning(f"Error parsing job link: {e}")
                    continue

            logger.info(f"Successfully extracted {len(jobs)} jobs")
            return jobs

        except Exception as e:
            logger.error(f"Failed to fetch jobs: {e}")
            raise

    def get_job_details(self, job_url: str, wait_time: int = 2000) -> Dict[str, Any]:
        """
        Fetch detailed information about a specific job.

        Args:
            job_url: Relative job URL (e.g., '/jobs/vercel/content-engineer-shkfa9')
            wait_time: Time to wait for page to load (milliseconds)

        Returns:
            Dictionary containing job details

        Raises:
            RuntimeError: If browser session not started
            Exception: If page fails to load or parse
        """
        if not self.page:
            raise RuntimeError("Browser not started. Call start() first.")

        logger.info(f"Fetching job details for: {job_url}")

        try:
            # Ensure URL is absolute
            if job_url.startswith('/'):
                full_url = f"{self.BASE_URL}{job_url}"
            else:
                full_url = job_url

            # Navigate to job page
            self.page.goto(full_url, wait_until='networkidle')
            self.page.wait_for_timeout(wait_time)

            # Extract job details
            details = {
                'url': full_url,
                'title': None,
                'company': None,
                'location': None,
                'description': None,
                'external_apply_url': None,
                'scraped_at': datetime.now(datetime.now().astimezone().tzinfo).isoformat()
            }

            # Get title
            title_elem = self.page.query_selector('h1')
            if title_elem:
                details['title'] = title_elem.inner_text().strip()

            # Get company (from URL or page)
            parts = job_url.strip('/').split('/')
            if len(parts) >= 2:
                details['company'] = parts[1]

            # Get location
            location_elem = self.page.query_selector('[class*="location"], [data-testid="location"]')
            if location_elem:
                details['location'] = location_elem.inner_text().strip()

            # Get description
            desc_elem = self.page.query_selector('article, [class*="description"], [class*="job-content"]')
            if desc_elem:
                details['description'] = desc_elem.inner_text().strip()

            # Get external apply URL (Greenhouse, etc.)
            apply_links = self.page.query_selector_all('a[href*="greenhouse"], a[href*="apply"], a[href*="amazon.jobs"]')
            for link in apply_links:
                href = link.get_attribute('href')
                if href and href.startswith('http'):
                    details['external_apply_url'] = href
                    break

            logger.info(f"Successfully fetched details for: {details.get('title', 'Unknown')}")
            return details

        except Exception as e:
            logger.error(f"Failed to fetch job details: {e}")
            raise

    def click_first_job(self, wait_time: int = 2000) -> Dict[str, Any]:
        """
        Navigate to jobs page and click on the first job listing.

        This replicates the user action from the HAR file.

        Args:
            wait_time: Time to wait for page transitions (milliseconds)

        Returns:
            Dictionary containing the clicked job details

        Raises:
            RuntimeError: If browser session not started
            Exception: If no jobs found or click fails
        """
        if not self.page:
            raise RuntimeError("Browser not started. Call start() first.")

        logger.info("Executing: Navigate to /jobs and click first job")

        try:
            # Get all jobs
            jobs = self.get_jobs(wait_time=wait_time)

            if not jobs:
                raise Exception("No jobs found on the page")

            first_job = jobs[0]
            logger.info(f"First job found: {first_job.title} at {first_job.company}")

            # Click on the first job
            first_job_link = self.page.query_selector(f'a[href="{first_job.relative_url}"]')

            if not first_job_link:
                raise Exception(f"Could not find clickable link for {first_job.relative_url}")

            logger.info(f"Clicking on: {first_job.title}")
            first_job_link.click()

            # Wait for navigation
            self.page.wait_for_load_state('networkidle')
            self.page.wait_for_timeout(wait_time)

            current_url = self.page.url
            logger.info(f"Navigated to: {current_url}")

            # Get job details from the current page
            job_details = self.get_job_details(current_url, wait_time=500)

            return job_details

        except Exception as e:
            logger.error(f"Failed to click first job: {e}")
            raise

    def save_jobs_to_json(self, jobs: List[Job], filepath: str) -> None:
        """
        Save jobs list to a JSON file.

        Args:
            jobs: List of Job objects
            filepath: Path to output JSON file
        """
        logger.info(f"Saving {len(jobs)} jobs to {filepath}")

        jobs_data = [
            {
                'title': job.title,
                'company': job.company,
                'url': job.url,
                'relative_url': job.relative_url,
                'slug': job.slug,
                'external_apply_url': job.external_apply_url,
                'location': job.location,
            }
            for job in jobs
        ]

        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(jobs_data, f, indent=2, ensure_ascii=False)

        logger.info(f"Jobs saved successfully to {filepath}")


def main():
    """
    Example usage of the Stapply API client.
    Replicates the original user action: go to /jobs and click on the first job.
    """
    print("=" * 80)
    print("Stapply.ai Jobs API Client - Example Usage")
    print("=" * 80)

    # Use context manager for automatic cleanup
    with StapplyAPIClient(headless=True) as client:
        try:
            # Replicate the original action from HAR file
            print("\n1. Navigating to /jobs and clicking first job...")
            job_details = client.click_first_job()

            print("\n2. Job Details:")
            print(f"   Title: {job_details['title']}")
            print(f"   Company: {job_details['company']}")
            print(f"   Location: {job_details['location']}")
            print(f"   URL: {job_details['url']}")
            if job_details['external_apply_url']:
                print(f"   Apply URL: {job_details['external_apply_url']}")

            # Additionally, get all jobs
            print("\n3. Fetching all jobs...")
            jobs = client.get_jobs()
            print(f"   Total jobs found: {len(jobs)}")

            print("\n4. First 5 jobs:")
            for i, job in enumerate(jobs[:5], 1):
                print(f"   {i}. {job.title} - {job.company}")

            # Save to file
            output_file = Path(__file__).parent / 'jobs_list.json'
            client.save_jobs_to_json(jobs, str(output_file))
            print(f"\n5. All jobs saved to: {output_file}")

            print("\n" + "=" * 80)
            print("SUCCESS: Task completed successfully!")
            print("=" * 80)

        except Exception as e:
            print(f"\nERROR: {e}")
            logger.exception("Unexpected error in main()")
            raise


if __name__ == '__main__':
    main()
