"""
FBRef scraper module.
Scrapes match data from FBRef for different competition types.
"""

# Standard library imports
import re
import time
from datetime import date, datetime
from typing import List, Dict, Optional, Tuple, Set

# Third-party imports
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import traceback

from utils.format import _format_fbref
from scrapers.headless_scraper import get_soup
from utils.format import normalize_name
from utils.date import normalize_dates

# --- Constants & polite crawling settings ---
REQUEST_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) "
        "Gecko/20100101 Firefox/131.0"
    ),
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "DNT": "1",
}
REQUEST_SLEEP_SECONDS = 6


class FBRefScraper:
    """Unified scraper for matches, players and keepers data (per-link incremental)."""

    # =============== Initialization ===============

    def __init__(
        self,
        last_season: str,
        features_config: Dict[str, any],
        competition: Tuple[str, int, int],
        competition_config: Dict,
        comp_type: str,
        days: List[datetime],
    ):

        self.last_season = last_season
        self.features_config = features_config
        self.competition = competition
        self.comp_type = comp_type
        self.days = days

        self.headers = REQUEST_HEADERS
        self.sleep_seconds = REQUEST_SLEEP_SECONDS

        # Extract country
        self.country = competition_config.get("country", "unknown")

        # Extract and format information scraping URLs
        info_urls = competition_config.get("information_scraping_urls", {})
        self.url_matches = _format_fbref(info_urls.get("matches", ""), self.last_season)
        self.url_ranking = _format_fbref(info_urls.get("ranking", ""), self.last_season)

    # =============== Schedule / Matches ===============

    def get_matches_by_dates(
        self,
    ) -> Tuple[List[str], List[int], List[Tuple[int, int]], List[int]]:
        """Fetch matches ONLY for the dates in self.days."""

        # Validate self.days
        if not self.days:
            print("No dates in self.days; returning empty lists")
            links, gameweeks, scores, result = [], [], [], []
            return links, gameweeks, scores, result

        # Normalize self.days to set of date objects
        requested_dates = normalize_dates(self.days)

        # Fetch and parse the schedule page
        try:
            soup = self._fetch_schedule_page()
        except Exception as e:
            print(f"Failed to fetch schedule page: {e}")
            links, gameweeks, scores, result = [], [], [], []
            return links, gameweeks, scores, result

        all_tables = soup.find_all("table")

        table = soup.find("table", {"id": lambda x: x and "sched" in x})

        if not table:
            if all_tables:
                table = all_tables[0]
            else:
                print("No tables found at all on page!")
                raise RuntimeError("Schedule table not found on FBRef page")

        # Parse matches from table
        collected_matches = self._parse_matches_table(table, requested_dates)

        # Sort chronologically
        collected_matches.sort(key=lambda x: x[0])

        # Unpack to separate lists
        links = [match[1] for match in collected_matches]
        gameweeks = [match[2] for match in collected_matches]
        scores = [match[3] for match in collected_matches]
        result = [match[4] for match in collected_matches]

        return links, gameweeks, scores, result

    def _fetch_schedule_page(self) -> BeautifulSoup:
        """Fetch and parse the FBRef schedule page using HeadlessX."""
        try:
            soup = get_soup(
                self.url_matches,
                human_behavior=True,
                timeout_ms=45000,
                verbose=False
            )
            
            return soup
            
        except Exception as e:
            print(f"Failed to fetch schedule page with HeadlessX: {e}")
            raise

    def _parse_matches_table(
        self, table, requested_dates: set[date]
    ) -> List[Tuple[pd.Timestamp, str, int, Tuple[int, int], int]]:
        """Parse matches from the schedule table."""

        collected_matches = []
        current_gameweek = None

        # Get all rows
        tbody = table.find("tbody")
        rows = tbody.find_all("tr", recursive=False) if tbody else table.find_all("tr")

        # Contadores para debug
        total_rows = 0
        rows_with_date = 0
        rows_matching_date = 0
        rows_with_link = 0
        rows_added = 0

        for i, row in enumerate(rows):
            total_rows += 1

            # Extract gameweek from THIS row (not as a separate header)
            gameweek_cell = row.find("th", {"data-stat": "gameweek"})
            if not gameweek_cell:
                gameweek_cell = row.find("td", {"data-stat": "gameweek"})

            if gameweek_cell:
                current_gameweek = self._extract_gameweek(gameweek_cell)

            # Buscar celda de fecha
            date_cell = row.find("td", {"data-stat": "date"})
            if not date_cell:
                date_cell = row.find("th", {"data-stat": "date"})

            if date_cell:
                rows_with_date += 1
                csk = date_cell.get("csk", "NO_CSK")

                # Parsear la fecha
                if csk and csk != "NO_CSK" and len(csk) == 8 and csk.isdigit():
                    try:
                        parsed_date = pd.to_datetime(
                            csk, format="%Y%m%d", errors="coerce"
                        )
                        if not pd.isna(parsed_date):
                            match_date = parsed_date.date()

                            # Ver si está en requested_dates
                            if match_date in requested_dates:
                                rows_matching_date += 1

                                # Ver si tiene link de match report
                                report_cell = row.find(
                                    "td", {"data-stat": "match_report"}
                                )
                                if report_cell:
                                    link = report_cell.find("a")
                                    if link and link.get("href"):
                                        rows_with_link += 1
                    except Exception as e:
                        print(f"  Row {i}: Error parsing date: {e}")
            else:
                if i < 10:
                    print(f"Row {i}: No date cell found")

            # Parse match row normalmente (pasando el gameweek extraído de ESTA fila)
            match_data = self._parse_match_row(row, requested_dates, current_gameweek)

            if match_data:
                rows_added += 1
                collected_matches.append(match_data)

        return collected_matches

    def _extract_gameweek(self, gameweek_cell) -> Optional[int]:
        """
        Extract gameweek number from cell.

        Args:
            gameweek_cell: BeautifulSoup cell element

        Returns:
            Gameweek number or None
        """
        text = gameweek_cell.get_text(strip=True)

        # Buscar cualquier número en el texto
        match = re.search(r"(\d+)", text)
        if match:
            gw_num = int(match.group(1))
            return gw_num

        return None

    def _parse_match_row(
        self, row, requested_dates: Set[date], current_gameweek: Optional[int]
    ) -> Optional[Tuple[pd.Timestamp, str, int, Tuple[int, int], int]]:
        """
        Parse a single match row.

        Args:
            row: BeautifulSoup row element
            requested_dates: Dates to filter for
            current_gameweek: Current gameweek number

        Returns:
            Tuple of (timestamp, link, gameweek, score, result) or None if row should be skipped
        """

        # Extract date
        match_datetime = self._extract_date(row)

        if not match_datetime:
            return None

        match_date = match_datetime.date()

        # Check if date is in requested range
        if match_date not in requested_dates:
            return None

        # Extract match report link (only played matches have this)
        match_link = self._extract_match_link(row)

        if not match_link:
            print(f"No match link for {match_date} (not played yet?)")
            return None

        # Extract score and result
        score, result = self._extract_score_and_result(row)

        gameweek = int(current_gameweek) if current_gameweek is not None else -1

        return (match_datetime, match_link, gameweek, score, result)

    def _extract_date(self, row) -> Optional[pd.Timestamp]:
        """
        Extract date from row's Date column.

        Args:
            row: BeautifulSoup row element

        Returns:
            Pandas Timestamp or None
        """
        # Buscar en la columna "Date" (data-stat="date")
        date_cell = row.find("td", {"data-stat": "date"})

        if not date_cell:
            date_cell = row.find("th", {"data-stat": "date"})

        if not date_cell:
            return None

        # En FBRef, el atributo 'csk' contiene la fecha en formato YYYYMMDD
        csk = date_cell.get("csk")

        if csk:
            try:
                # Formato: "20170818" -> datetime
                if len(csk) == 8 and csk.isdigit():
                    # Parsear directamente con pandas
                    match_dt = pd.to_datetime(csk, format="%Y%m%d", errors="coerce")

                    if not pd.isna(match_dt):
                        return match_dt
                    else:
                        print(f"Could not parse csk date: {csk}")
            except Exception as e:
                print(f"Error parsing csk date '{csk}': {e}")

        # Fallback: Intentar con texto visible (formato YYYY-MM-DD)
        date_text = date_cell.get_text(strip=True)

        if date_text:
            try:
                match_dt = pd.to_datetime(date_text, errors="coerce")

                if not pd.isna(match_dt):
                    return match_dt
            except Exception as e:
                print(f"Error parsing date text '{date_text}': {e}")

        return None

    def _extract_match_link(self, row) -> Optional[str]:
        """
        Extract match report link from row.

        Args:
            row: BeautifulSoup row element

        Returns:
            Full match URL or None if not available
        """
        report_cell = row.find("td", {"data-stat": "match_report"})

        if not report_cell:
            print(f"No match_report cell found")
            return None

        link_element = report_cell.find("a")

        if not link_element:
            print(f"No <a> tag in match_report cell")
            return None

        href = link_element.get("href")

        if not href:
            print(f"No href attribute in <a> tag")
            return None

        # Verificar que sea un link de match
        if not href.startswith("/en/matches/"):
            print(f"href doesn't start with /en/matches/: {href}")
            return None

        full_url = f"https://fbref.com{href}"

        return full_url

    def _extract_score_and_result(self, row) -> Tuple[Tuple[int, int], int]:
        """
        Extract score and match result from row.

        Args:
            row: BeautifulSoup row element

        Returns:
            Tuple of ((home_goals, away_goals), result)
            where result is 0=home win, 1=draw, 2=away win
        """
        score_cell = row.find("td", {"data-stat": "score"})
        score_text = score_cell.get_text(strip=True) if score_cell else ""

        # Try different separators
        home_goals, away_goals = 0, 0

        for separator in ["–", "-"]:
            if separator in score_text:
                try:
                    parts = score_text.split(separator)
                    home_goals = int(parts[0].strip())
                    away_goals = int(parts[1].strip())
                    break
                except (ValueError, IndexError):
                    continue

        # Determine result: 0=home win, 1=draw, 2=away win
        if home_goals > away_goals:
            result = 0
        elif home_goals < away_goals:
            result = 2
        else:
            result = 1

        return (home_goals, away_goals), result

    def _build_base_matches_df(self) -> pd.DataFrame:
        """
        Crea el dataframe base para los PRÓXIMOS PARTIDOS NUEVOS (todos los que haya),
        dimensionando por len(links) tras consultar el calendario.
        """
        links, gameweeks, scores, result = self.get_matches_by_dates()
        n = len(links)

        if n == 0:
            # No hay nada nuevo; deja df vacío pero con columnas consistentes
            cols = [
                "gameweek",
                "id",
                "date_of_match",
                "hour_of_the_match",
                "day_of_week",
                "day_of_year",
                "hour_of_day",
                "home_team_name",
                "away_team_name",
                "home_trainer",
                "away_trainer",
                "stadium",
                "attendance",
                "referee",
                "var",
                "home_team_formation",
                "away_team_formation",
                "home_possession",
                "away_possession",
                "link",
                "home_goals",
                "away_goals",
                "result",
                "home_result",
                "away_result",
            ]
            df_final = pd.DataFrame(columns=cols)
            return df_final

        # Base vacío del tamaño exacto n
        df_base = pd.DataFrame(
            {
                "competition": pd.Series([self.competition] * n, dtype="string"),
                "id": [None] * n,
                "date_of_match": [None] * n,
                "hour_of_the_match": [None] * n,
                "day_of_week": [None] * n,
                "day_of_year": [None] * n,
                "hour_of_day": [None] * n,
                "home_team_name": pd.Series([None] * n, dtype="str"),
                "away_team_name": pd.Series([None] * n, dtype="str"),
                "home_trainer": pd.Series([None] * n, dtype="str"),
                "away_trainer": pd.Series([None] * n, dtype="str"),
                "stadium": pd.Series([None] * n, dtype="str"),
                "attendance": pd.Series([None] * n, dtype="float"),
                "referee": pd.Series([None] * n, dtype="str"),
                "var": pd.Series([None] * n, dtype="str"),
                "home_team_formation": pd.Series([None] * n, dtype="str"),
                "away_team_formation": pd.Series([None] * n, dtype="str"),
                "home_possession": pd.Series([None] * n, dtype="float"),
                "away_possession": pd.Series([None] * n, dtype="float"),
            }
        )

        df_gameweeks = pd.DataFrame(gameweeks, columns=["gameweek"])
        df_links = pd.DataFrame(links, columns=["link"])
        df_scores = pd.DataFrame(scores, columns=["home_goals", "away_goals"])
        df_result = pd.DataFrame(result, columns=["result"])
        df_result["home_result"] = df_result["result"].map({0: 0, 1: 1, 2: 2})
        df_result["away_result"] = df_result["result"].map({0: 2, 1: 1, 2: 0})

        df_final = pd.concat(
            [df_gameweeks, df_base, df_links, df_scores, df_result], axis=1
        )
        return df_final

    def _enrich_one_match_page_no_csv(
        self, df: pd.DataFrame, idx: int, link: str, last_id: int
    ) -> int:
        """
        Enrich base DF at row `idx` with details parsed from the match page `link`.
        Does NOT save to CSV - only updates df_final in memory.
        """

        # Use get_soup instead of requests.get
        soup = get_soup(link, human_behavior=True, timeout_ms=45000)

        # Extract teams
        teams_elements = soup.find_all("span", class_="teamandlogo")
        if len(teams_elements) >= 2:
            home_team_raw = (teams_elements[0].text or "").strip()
            away_team_raw = (teams_elements[1].text or "").strip()
        else:
            home_team_raw = away_team_raw = ""

        home_team = normalize_name(home_team_raw)
        away_team = normalize_name(away_team_raw)

        df.at[idx, "home_team_name"] = home_team if home_team else "NaN"
        df.at[idx, "away_team_name"] = away_team if away_team else "NaN"

        # Allocate id
        last_id += 1
        df.at[idx, "id"] = last_id

        # Extract date/time
        date_element = soup.find("span", {"class": "venuetime"})
        if date_element:
            match_date = date_element.get("data-venue-date")
            match_time = date_element.get("data-venue-time")
            if match_date:
                match_date = pd.to_datetime(match_date, errors="coerce").strftime(
                    "%Y-%m-%d"
                )
                df.at[idx, "date_of_match"] = match_date
            if match_time:
                parsed_time = pd.to_datetime(
                    match_time, format="%H:%M", errors="coerce"
                )
                df.at[idx, "hour_of_the_match"] = (
                    parsed_time.strftime("%H:%M") if parsed_time is not pd.NaT else None
                )
            if df.at[idx, "date_of_match"] and df.at[idx, "hour_of_the_match"]:
                full_dt = pd.to_datetime(
                    f"{df.at[idx, 'date_of_match']} {df.at[idx, 'hour_of_the_match']}"
                )
                df.at[idx, "day_of_week"] = int(full_dt.dayofweek)
                df.at[idx, "day_of_year"] = int(full_dt.dayofyear)
                df.at[idx, "hour_of_day"] = float(full_dt.hour + full_dt.minute / 60.0)

        # Extract trainers
        trainers = []
        for dp in soup.find_all("div", class_="datapoint"):
            if "Manager" in dp.text:
                txt = dp.text.replace("Manager", "").replace(":", "").strip()
                if txt:
                    trainers.append(normalize_name(txt))

        df.at[idx, "home_trainer"] = trainers[0] if len(trainers) >= 1 else pd.NA
        df.at[idx, "away_trainer"] = trainers[1] if len(trainers) >= 2 else pd.NA

        scorebox_meta = soup.find("div", class_="scorebox_meta")
        smalls = scorebox_meta.find_all("small") if scorebox_meta else []

        attendance = 0
        for sm in smalls:
            txt = sm.text.strip().replace(",", "").replace(".", "")
            if txt.isdigit():
                attendance = int(txt)
                break
        df.at[idx, "attendance"] = attendance

        stadium = "NaN"
        cand = None
        for sm in smalls:
            if "Officials" in sm.text:
                break
            cand = sm.text
        if cand:
            stadium_raw = cand.split(",")[0].strip()
            stadium = normalize_name(stadium_raw)
        df.at[idx, "stadium"] = stadium if stadium else "NaN"

        # Extract referee/VAR
        referee_txt = None
        var_txt = None

        # Buscar el <small> que tiene exactamente "Officials" como texto
        officials_small = None
        for sm in smalls:
            if sm.get_text(strip=True) == "Officials":
                officials_small = sm
                break

        if officials_small:
            officials_strong = officials_small.find_parent("strong")
            
            if officials_strong:
                next_small = officials_strong.find_next_sibling("small")
                
                if next_small:
                    # Primero intentar con spans
                    all_spans = next_small.find_all("span", style="display:inline-block")
                    
                    if all_spans:
                        # Caso 1: Hay spans con style
                        for span in all_spans:
                            text = span.get_text()
                            if "(Referee)" in text:
                                referee_txt = text.replace("(Referee)", "").replace("\xa0", " ").strip()
                            elif "(VAR)" in text:
                                var_txt = text.replace("(VAR)", "").replace("\xa0", " ").strip()
                    else:
                        # Caso 2: No hay spans, parsear texto directo
                        full_text = next_small.get_text()
                        # Separar por middle dot (·)
                        officials_list = [x.strip() for x in full_text.split("·")]
                        
                        for official in officials_list:
                            if "(Referee)" in official:
                                referee_txt = official.replace("(Referee)", "").replace("\xa0", " ").strip()
                            elif "(VAR)" in official:
                                var_txt = official.replace("(VAR)", "").replace("\xa0", " ").strip()

        referee_norm = normalize_name(referee_txt) if referee_txt else pd.NA
        var_norm = normalize_name(var_txt) if var_txt else pd.NA

        df.at[idx, "referee"] = referee_norm
        df.at[idx, "var"] = var_norm

        # Extract possession
        team_stats = soup.find("div", id="team_stats")
        if team_stats:
            strongs = team_stats.find_all("strong")
            if len(strongs) >= 2:
                hp = (strongs[0].text or "").strip().strip("%")
                ap = (strongs[1].text or "").strip().strip("%")
                df.at[idx, "home_possession"] = float(hp) if hp else None
                df.at[idx, "away_possession"] = float(ap) if ap else None

        # Extract formations
        home_formation = None
        away_formation = None

        # Find lineup divs by id
        home_lineup_div = soup.find("div", {"class": "lineup", "id": "a"})
        away_lineup_div = soup.find("div", {"class": "lineup", "id": "b"})

        # Extract home formation
        if home_lineup_div:
            home_table = home_lineup_div.find("table")
            if home_table:
                home_th = home_table.find("th", {"colspan": "2"})
                if home_th:
                    text = home_th.get_text(strip=True)
                    # Extract formation from text like "Málaga (3-4-3)"
                    if "(" in text and ")" in text:
                        home_formation = text.split("(")[1].split(")")[0].strip()

        # Extract away formation
        if away_lineup_div:
            away_table = away_lineup_div.find("table")
            if away_table:
                away_th = away_table.find("th", {"colspan": "2"})
                if away_th:
                    text = away_th.get_text(strip=True)
                    # Extract formation from text like "Valencia (4-4-2)"
                    if "(" in text and ")" in text:
                        away_formation = text.split("(")[1].split(")")[0].strip()

        df.at[idx, "home_team_formation"] = home_formation if home_formation else pd.NA
        df.at[idx, "away_team_formation"] = away_formation if away_formation else pd.NA

        # Initialize event counters by 15-minute intervals
        intervals = ["0_15", "15_30", "30_45", "45_60", "60_75", "75_90"]

        for team in ["home", "away"]:
            for interval in intervals:
                df.at[idx, f"{team}_goals_{interval}"] = 0
                df.at[idx, f"{team}_yellow_cards_{interval}"] = 0
                df.at[idx, f"{team}_red_cards_{interval}"] = 0

        # Additional time goals
        df.at[idx, "home_goals_45plus"] = 0
        df.at[idx, "away_goals_45plus"] = 0
        df.at[idx, "home_goals_90plus"] = 0
        df.at[idx, "away_goals_90plus"] = 0

        # Extract events
        events_wrap = soup.find("div", id="events_wrap")
        if events_wrap:
            # Find all event divs that have class "event" and either "a" or "b"
            events = events_wrap.find_all("div", class_="event")

            for event in events:
                # Determine if it's home (a) or away (b)
                event_classes = event.get("class", [])
                is_home = "a" in event_classes
                is_away = "b" in event_classes
                team_prefix = "home" if is_home else "away" if is_away else None

                if not team_prefix:
                    continue

                # Extract minute from the first child div
                # The structure is: <div>&nbsp;57&rsquor;<br/><small>...</small></div>
                minute_div = event.find("div", recursive=False)
                if not minute_div:
                    continue

                # Get text and parse - BeautifulSoup converts HTML entities automatically
                minute_text = minute_div.get_text(strip=True)

                # Remove the score part (everything after newline if exists)
                if "\n" in minute_text:
                    minute_text = minute_text.split("\n")[0].strip()

                # Parse minute
                minute = None
                is_45plus = False
                is_90plus = False

                if minute_text:
                    # Clean: remove apostrophes and other characters
                    minute_clean = (
                        minute_text.replace("'", "")
                        .replace("'", "")
                        .replace("′", "")
                        .strip()
                    )

                    if "+" in minute_clean:
                        parts = minute_clean.split("+")
                        try:
                            base = int(parts[0])
                            if base == 45:
                                is_45plus = True
                                minute = 45
                            elif base >= 90:
                                is_90plus = True
                                minute = 90
                            else:
                                minute = base
                        except ValueError:
                            continue
                    else:
                        # Extract just the number
                        import re

                        match = re.search(r"(\d+)", minute_clean)
                        if match:
                            try:
                                minute = int(match.group(1))
                            except ValueError:
                                continue
                        else:
                            continue

                if minute is None:
                    continue

                # Find the event icon - it's a sibling div with class event_icon
                event_divs = event.find_all("div", recursive=False)
                if len(event_divs) < 2:
                    continue

                # The second div contains the event icon
                event_content_div = event_divs[1]
                event_icon = event_content_div.find("div", class_="event_icon")

                if not event_icon:
                    continue

                icon_classes = event_icon.get("class", [])
                class_string = " ".join(icon_classes).lower()

                # Check for goals
                if "goal" in class_string:
                    if is_45plus:
                        df.at[idx, f"{team_prefix}_goals_45plus"] += 1
                    elif is_90plus:
                        df.at[idx, f"{team_prefix}_goals_90plus"] += 1
                    else:
                        if minute <= 15:
                            df.at[idx, f"{team_prefix}_goals_0_15"] += 1
                        elif minute <= 30:
                            df.at[idx, f"{team_prefix}_goals_15_30"] += 1
                        elif minute <= 45:
                            df.at[idx, f"{team_prefix}_goals_30_45"] += 1
                        elif minute <= 60:
                            df.at[idx, f"{team_prefix}_goals_45_60"] += 1
                        elif minute <= 75:
                            df.at[idx, f"{team_prefix}_goals_60_75"] += 1
                        else:
                            df.at[idx, f"{team_prefix}_goals_75_90"] += 1

                # Check for yellow cards
                elif "yellow_card" in class_string or "yellow-card" in class_string:
                    effective_minute = (
                        minute
                        if not (is_45plus or is_90plus)
                        else (45 if is_45plus else 90)
                    )
                    if effective_minute <= 15:
                        df.at[idx, f"{team_prefix}_yellow_cards_0_15"] += 1
                    elif effective_minute <= 30:
                        df.at[idx, f"{team_prefix}_yellow_cards_15_30"] += 1
                    elif effective_minute <= 45:
                        df.at[idx, f"{team_prefix}_yellow_cards_30_45"] += 1
                    elif effective_minute <= 60:
                        df.at[idx, f"{team_prefix}_yellow_cards_45_60"] += 1
                    elif effective_minute <= 75:
                        df.at[idx, f"{team_prefix}_yellow_cards_60_75"] += 1
                    else:
                        df.at[idx, f"{team_prefix}_yellow_cards_75_90"] += 1

                # Check for red cards
                elif "red_card" in class_string or "red-card" in class_string:
                    effective_minute = (
                        minute
                        if not (is_45plus or is_90plus)
                        else (45 if is_45plus else 90)
                    )
                    if effective_minute <= 15:
                        df.at[idx, f"{team_prefix}_red_cards_0_15"] += 1
                    elif effective_minute <= 30:
                        df.at[idx, f"{team_prefix}_red_cards_15_30"] += 1
                    elif effective_minute <= 45:
                        df.at[idx, f"{team_prefix}_red_cards_30_45"] += 1
                    elif effective_minute <= 60:
                        df.at[idx, f"{team_prefix}_red_cards_45_60"] += 1
                    elif effective_minute <= 75:
                        df.at[idx, f"{team_prefix}_red_cards_60_75"] += 1
                    else:
                        df.at[idx, f"{team_prefix}_red_cards_75_90"] += 1

        return df

    # =============== Players/Keeper extraction ===============

    def _fetch_match_page(self, link: str) -> BeautifulSoup:
        """
        Fetch web page content for the given link and return BeautifulSoup.
        Uses HeadlessX to bypass anti-bot protections.
        """
        # Use get_soup instead of requests.get
        soup = get_soup(link, human_behavior=True, timeout_ms=45000)
        time.sleep(self.sleep_seconds)  # politeness delay
        return soup

    def _extract_match_information(self, link: str) -> dict:
        """
        Extract IDs and names of home and away teams using team logos from the provided link.
        """
        soup = self._fetch_match_page(link)

        team_imgs = soup.find_all("img", class_="teamlogo", src=True)
        if len(team_imgs) < 2:
            raise Exception("Not enough team logos found. Expected at least two.")

        def _parse_team(img_tag):
            src = img_tag.get("src", "")
            alt = img_tag.get("alt", "")
            team_id = re.sub(
                r"\.\w+$", "", src.split("/")[-1]
            ).strip()  # filename w/o extension
            team_name = alt.replace(" Club Crest", "").strip().replace(" ", "_")
            return {"id": team_id, "name": team_name}

        teams_data = {
            "home": _parse_team(team_imgs[0]),
            "away": _parse_team(team_imgs[1]),
        }
        return teams_data, soup

    def extract_players_table(
        self, team_type, table_type, header_offset, columns_to_drop, teams_data, soup
    ):
        """
        Extract a specific player statistics table from the page.
        """
        team_id = teams_data[team_type]["id"]

        # Try both possible table selectors
        table = soup.select_one(
            f"#div_stats_{team_id}_{table_type}"
        ) or soup.select_one(f"#stats_{team_id}_{table_type}")

        if not table:
            raise Exception(
                f"Players table '{table_type}' not found for team '{team_type}'."
            )

        # Extract headers
        thead = table.find("thead")
        headers = [th.get_text(strip=True) for th in thead.find_all("th")][
            header_offset:
        ]

        # Extract rows
        tbody = table.find("tbody") or table
        rows = [
            [cell.get_text(strip=True) for cell in row.find_all(["td", "th"])]
            for row in tbody.find_all("tr")
        ]

        # Create DataFrame and drop unwanted columns
        df = pd.DataFrame(rows, columns=headers)
        df = df.loc[:, ~df.columns.isin(columns_to_drop)]

        time.sleep(self.sleep_seconds)

        return df

    def extract_keeper_table(self, team_type, header_offset, teams_data, soup):
        """
        Extract keeper statistics table from the page.
        """
        team_id = teams_data[team_type]["id"]

        # Try both possible table selectors
        table = soup.select_one(f"#keeper_stats_{team_id}") or soup.select_one(
            f"#div_keeper_stats_{team_id}"
        )

        if not table:
            raise Exception(f"Keeper table not found for team '{team_type}'.")

        # Extract headers
        thead = table.find("thead")
        headers = [th.get_text(strip=True) for th in thead.find_all("th")][
            header_offset:
        ]

        # Extract rows
        tbody = table.find("tbody") or table
        rows = [
            [cell.get_text(strip=True) for cell in row.find_all(["td", "th"])]
            for row in tbody.find_all("tr")
        ]

        df = pd.DataFrame(rows, columns=headers)

        time.sleep(self.sleep_seconds)

        return df

    def process_players_data(self, team_type, teams_data, soup):
        """
        Process all player statistics tables for a specific team (home or away).
        Returns dictionary of DataFrames for each table type.
        """
        # Define columns to drop for each table type (ORIGINAL - NO CAMBIAR)
        columns_to_drop = {
            "summary": [
                "Gls",
                "Ast",
                "PK",
                "PKatt",
                "CrdY",
                "CrdR",
                "Touches",
                "Tkl",
                "Int",
                "Blocks",
                "xG",
                "npxG",
                "xAG",
                "SCA",
                "GCA",
                "Cmp",
                "Att",
                "Cmp%",
                "PrgP",
                "Carries",
                "PrgC",
                "Succ",
            ],
            "passing": ["Player", "#", "Nation", "Pos", "Age", "Min"],
            "passing_types": [
                "Player",
                "#",
                "Nation",
                "Pos",
                "Age",
                "Min",
                "Att",
                "Cmp",
            ],
            "defense": ["Player", "#", "Nation", "Pos", "Age", "Min"],
            "possession": [
                "Player",
                "#",
                "Nation",
                "Pos",
                "Age",
                "Min",
                "Tkld",
                "Tkld%",
            ],
            "misc": [
                "Player",
                "#",
                "Nation",
                "Pos",
                "Age",
                "Min",
                "Off",
                "Crs",
                "Int",
                "TklW",
                "OG",
            ],
        }

        tables = {}

        # Extract each table type
        for table_type, header_offset in [
            ("summary", 7),
            ("passing", 9),
            ("passing_types", 4),
            ("defense", 5),
            ("possession", 5),
            ("misc", 3),
        ]:
            tables[table_type] = self.extract_players_table(
                team_type,
                table_type,
                header_offset,
                columns_to_drop.get(table_type, []),
                teams_data,
                soup,
            )

        return tables

    def avg_minutes_per_player(self, final_table: pd.DataFrame, team_type: str):
        """
        Calculate average minutes per player for a single match.
        Returns total team minutes divided by number of players who logged minutes.
        """
        player_col = f"{team_type}_Players"
        minutes_col = f"{team_type}_PlayersMinutes"

        if (
            player_col not in final_table.columns
            or minutes_col not in final_table.columns
        ):
            raise KeyError(f"Missing required columns: {player_col}, {minutes_col}")

        # Convert minutes to numeric
        minutes = pd.to_numeric(final_table[minutes_col], errors="coerce")

        # Valid players: non-empty name, non-null minutes, minutes > 0
        valid_mask = (
            final_table[player_col].astype(str).str.strip().ne("")
            & minutes.notna()
            & (minutes > 0)
        )

        # Calculate totals
        players_count = final_table.loc[valid_mask, player_col].nunique()
        team_minutes_total = minutes.loc[valid_mask].sum(min_count=1)

        if not players_count or pd.isna(team_minutes_total):
            return pd.NA

        return float(team_minutes_total) / float(players_count)

    def get_players_data(self, teams_data, soup) -> pd.DataFrame:
        """
        Extract player statistics for both teams and return as DataFrame.
        Does NOT save to CSV - returns aggregated data in memory.
        """
        # Dictionary to store results for both teams
        team_results = {}
        final_df_players = {}

        # Process data for both home and away teams
        for team_type in ["home", "away"]:
            # Extract and process all player statistics tables for the team
            team_tables = self.process_players_data(team_type, teams_data, soup)

            # Combine all extracted tables into a single DataFrame
            final_table = pd.concat(team_tables.values(), axis=1)

            # Define the new column names for the dataset with the team_type prefix
            new_columns = [
                f"{team_type}_Players",
                f"{team_type}_Number",
                f"{team_type}_Nationality",
                f"{team_type}_Position",
                f"{team_type}_PlayersAge",
                f"{team_type}_PlayersMinutes",
                f"{team_type}_PlayersShots",
                f"{team_type}_PlayersShotsOnTarget",
                f"{team_type}_PlayersCompletedPasses",
                f"{team_type}_PlayersAttemptedPasses",
                f"{team_type}_Players%CompletedPasses",
                f"{team_type}_PlayersDistancePasses",
                f"{team_type}_PlayersDistanceProgression",
                f"{team_type}_PlayersShortPasses",
                f"{team_type}_PlayersAttemptedShortPasses",
                f"{team_type}_Players%ShortCompletedPasses",
                f"{team_type}_PlayersMediumPasses",
                f"{team_type}_PlayersAttemptedMediumPasses",
                f"{team_type}_Players%MediumCompletedPasses",
                f"{team_type}_PlayersLongPasses",
                f"{team_type}_PlayersAttemptedLongPasses",
                f"{team_type}_Players%LongCompletedPasses",
                f"{team_type}_PlayersAssistance",
                f"{team_type}_PlayersExpectedGoalsAssistance",
                f"{team_type}_PlayersExpectedAssistance",
                f"{team_type}_PlayersKeyPasses",
                f"{team_type}_PlayersLast1/3Passes",
                f"{team_type}_PlayersGoalAreaPasses",
                f"{team_type}_PlayersGoalAreaCrosses",
                f"{team_type}_PlayersGoalPasses",
                f"{team_type}_PlayersLiveBallPasses",
                f"{team_type}_PlayersDeadBallPasses",
                f"{team_type}_PlayersFreeKick",
                f"{team_type}_PlayersThroughPasses",
                f"{team_type}_PlayersSidePasses",
                f"{team_type}_PlayersCrosses",
                f"{team_type}_PlayersStrongcrosses",
                f"{team_type}_PlayersCorner",
                f"{team_type}_PlayersCornerIn",
                f"{team_type}_PlayersCornerOut",
                f"{team_type}_PlayersCornerRect",
                f"{team_type}_PlayersOffsidePasses",
                f"{team_type}_PlayersPassesLost",
                f"{team_type}_PlayersTackles",
                f"{team_type}_PlayersSuccessfulTackles",
                f"{team_type}_PlayersTacklesInDefense",
                f"{team_type}_PlayersTacklesInMedium",
                f"{team_type}_PlayersTacklesInAttack",
                f"{team_type}_PlayersDribblerTackles",
                f"{team_type}_PlayersAttemptedDribblerTackles",
                f"{team_type}_Players%DribblerTacklesCompleted",
                f"{team_type}_PlayersDribblerTacklesNonCompleted",
                f"{team_type}_PlayersBallsBlocked",
                f"{team_type}_PlayersShotsBlocked",
                f"{team_type}_PlayersPassesBlocked",
                f"{team_type}_PlayersInterceptions",
                f"{team_type}_PlayersTackles+Interceptions",
                f"{team_type}_PlayersClearances",
                f"{team_type}_PlayersMistakesRivalShots",
                f"{team_type}_PlayersTouches",
                f"{team_type}_PlayersOwnPenaltyAreaTouches",
                f"{team_type}_PlayersTouchesInDefense",
                f"{team_type}_PlayersTouchesInMedium",
                f"{team_type}_PlayersTouchesInAttack",
                f"{team_type}_PlayersAwayPenaltyAreaTouches",
                f"{team_type}_PlayersLiveBallTouches",
                f"{team_type}_PlayersAttemptedDribbles",
                f"{team_type}_PlayersDribblesCompleted",
                f"{team_type}_Players%DribblesCompleted",
                f"{team_type}_PlayersBallCarries",
                f"{team_type}_PlayersDistanceCarried",
                f"{team_type}_PlayersForwardDistanceCarried",
                f"{team_type}_PlayersForwardCarries",
                f"{team_type}_PlayersCarriesInAttack",
                f"{team_type}_PlayersAwayPenaltyAreaCarries",
                f"{team_type}_PlayersLostControlCarries",
                f"{team_type}_PlayersLostCarries",
                f"{team_type}_PlayersPassesReception",
                f"{team_type}_PlayersAttackPassesReception",
                f"{team_type}_PlayersYellowCards",
                f"{team_type}_PlayersRedCards",
                f"{team_type}_PlayersSecondYellowCards",
                f"{team_type}_PlayersFouls",
                f"{team_type}_PlayersFoulsReceived",
                f"{team_type}_PlayersPenalties",
                f"{team_type}_PlayersPenaltiesConceded",
                f"{team_type}_PlayersLostBallRecoveries",
                f"{team_type}_PlayersAerialsWon",
                f"{team_type}_PlayersAerialsLost",
                f"{team_type}_Players%AerialsWon",
            ]

            # Rename the columns of the DataFrame
            final_table.columns = new_columns

            # Convert to integer if the value is a string with digits, otherwise set to None
            final_table[f"{team_type}_PlayersAge"] = final_table[
                f"{team_type}_PlayersAge"
            ].apply(
                lambda x: int(x[:2]) if isinstance(x, str) and x[:2].isdigit() else None
            )

            # Remove rows where PlayersAge is None
            final_table = final_table.dropna(subset=[f"{team_type}_PlayersAge"])

            # Define las columnas que quieres mantener
            columns_to_keep_players = [
                f"{team_type}_Players",
                f"{team_type}_Nationality",
                f"{team_type}_Position",
                f"{team_type}_PlayersAge",
                f"{team_type}_PlayersMinutes",
                # Finishing & creation
                f"{team_type}_PlayersShots",
                f"{team_type}_PlayersShotsOnTarget",
                f"{team_type}_PlayersGoalPasses",
                f"{team_type}_PlayersExpectedGoalsAssistance",
                f"{team_type}_PlayersExpectedAssistance",
                f"{team_type}_PlayersKeyPasses",
                f"{team_type}_PlayersTouches",
                f"{team_type}_PlayersAwayPenaltyAreaTouches",
                # Passing (quality + progression)
                f"{team_type}_PlayersCompletedPasses",
                f"{team_type}_PlayersAttemptedPasses",
                f"{team_type}_PlayersDistanceProgression",
                f"{team_type}_PlayersThroughPasses",
                f"{team_type}_PlayersCrosses",
                f"{team_type}_PlayersLiveBallPasses",
                f"{team_type}_PlayersDeadBallPasses",
                # Carrying / dribbling
                f"{team_type}_PlayersBallCarries",
                f"{team_type}_PlayersForwardCarries",
                f"{team_type}_PlayersDistanceCarried",
                f"{team_type}_PlayersForwardDistanceCarried",
                f"{team_type}_PlayersDribblesCompleted",
                f"{team_type}_PlayersAttemptedDribbles",
                # Defense / disruption
                f"{team_type}_PlayersTackles",
                f"{team_type}_PlayersSuccessfulTackles",
                f"{team_type}_PlayersInterceptions",
                f"{team_type}_PlayersTackles+Interceptions",
                f"{team_type}_PlayersBallsBlocked",
                f"{team_type}_PlayersShotsBlocked",
                f"{team_type}_PlayersClearances",
                # Duels / discipline
                f"{team_type}_PlayersAerialsWon",
                f"{team_type}_PlayersAerialsLost",
                f"{team_type}_PlayersFouls",
                f"{team_type}_PlayersFoulsReceived",
                f"{team_type}_PlayersYellowCards",
                f"{team_type}_PlayersRedCards",
                # Zones / heat
                f"{team_type}_PlayersTouchesInDefense",
                f"{team_type}_PlayersTouchesInMedium",
                f"{team_type}_PlayersTouchesInAttack",
                f"{team_type}_PlayersOwnPenaltyAreaTouches",
                # Others
                f"{team_type}_PlayersCorner",
                f"{team_type}_PlayersFreeKick",
                f"{team_type}_PlayersSidePasses",
                f"{team_type}_PlayersGoalAreaPasses",
                f"{team_type}_PlayersCarriesInAttack",
                f"{team_type}_PlayersAwayPenaltyAreaCarries",
            ]

            # Filtrar solo las columnas que existen
            available_cols = [
                col for col in columns_to_keep_players if col in final_table.columns
            ]

            # Crear DataFrame final con solo esas columnas
            filtered_df = final_table[available_cols].copy()

            filtered_df.columns = filtered_df.columns.str.replace(f"{team_type}_", "")

            filtered_df["Players"] = (
                filtered_df["Players"]
                .astype(str)
                .str.strip()
                .str.lower()
                .str.normalize("NFKD")
                .str.encode("ascii", "ignore")
                .str.decode("utf-8")  # quita tildes
                .str.replace(
                    r"[^a-z0-9]+", "_", regex=True
                )  # cualquier separador -> "_"
                .str.replace(r"_+", "_", regex=True)  # colapsa múltiples "_"
                .str.strip("_")  # quita "_" al inicio/fin
            )

            # Define columns to calculate the mean
            columns_to_mean = [
                f"{team_type}_PlayersAge",
                f"{team_type}_Players%CompletedPasses",
                f"{team_type}_Players%ShortCompletedPasses",
                f"{team_type}_Players%MediumCompletedPasses",
                f"{team_type}_Players%LongCompletedPasses",
                f"{team_type}_Players%DribblerTacklesCompleted",
                f"{team_type}_Players%DribblesCompleted",
                f"{team_type}_Players%AerialsWon",
            ]

            # Define columns to calculate the sum
            columns_to_sum = [
                f"{team_type}_PlayersMinutes",
                f"{team_type}_PlayersShots",
                f"{team_type}_PlayersShotsOnTarget",
                f"{team_type}_PlayersCompletedPasses",
                f"{team_type}_PlayersAttemptedPasses",
                f"{team_type}_PlayersDistancePasses",
                f"{team_type}_PlayersDistanceProgression",
                f"{team_type}_PlayersShortPasses",
                f"{team_type}_PlayersAttemptedShortPasses",
                f"{team_type}_PlayersMediumPasses",
                f"{team_type}_PlayersAttemptedMediumPasses",
                f"{team_type}_PlayersLongPasses",
                f"{team_type}_PlayersAttemptedLongPasses",
                f"{team_type}_PlayersAssistance",
                f"{team_type}_PlayersExpectedGoalsAssistance",
                f"{team_type}_PlayersExpectedAssistance",
                f"{team_type}_PlayersKeyPasses",
                f"{team_type}_PlayersLast1/3Passes",
                f"{team_type}_PlayersGoalAreaPasses",
                f"{team_type}_PlayersGoalAreaCrosses",
                f"{team_type}_PlayersGoalPasses",
                f"{team_type}_PlayersLiveBallPasses",
                f"{team_type}_PlayersDeadBallPasses",
                f"{team_type}_PlayersFreeKick",
                f"{team_type}_PlayersThroughPasses",
                f"{team_type}_PlayersSidePasses",
                f"{team_type}_PlayersCrosses",
                f"{team_type}_PlayersStrongcrosses",
                f"{team_type}_PlayersCorner",
                f"{team_type}_PlayersCornerIn",
                f"{team_type}_PlayersCornerOut",
                f"{team_type}_PlayersCornerRect",
                f"{team_type}_PlayersOffsidePasses",
                f"{team_type}_PlayersPassesLost",
                f"{team_type}_PlayersTackles",
                f"{team_type}_PlayersSuccessfulTackles",
                f"{team_type}_PlayersTacklesInDefense",
                f"{team_type}_PlayersTacklesInMedium",
                f"{team_type}_PlayersTacklesInAttack",
                f"{team_type}_PlayersDribblerTackles",
                f"{team_type}_PlayersAttemptedDribblerTackles",
                f"{team_type}_PlayersDribblerTacklesNonCompleted",
                f"{team_type}_PlayersBallsBlocked",
                f"{team_type}_PlayersShotsBlocked",
                f"{team_type}_PlayersPassesBlocked",
                f"{team_type}_PlayersInterceptions",
                f"{team_type}_PlayersTackles+Interceptions",
                f"{team_type}_PlayersClearances",
                f"{team_type}_PlayersMistakesRivalShots",
                f"{team_type}_PlayersTouches",
                f"{team_type}_PlayersOwnPenaltyAreaTouches",
                f"{team_type}_PlayersTouchesInDefense",
                f"{team_type}_PlayersTouchesInMedium",
                f"{team_type}_PlayersTouchesInAttack",
                f"{team_type}_PlayersAwayPenaltyAreaTouches",
                f"{team_type}_PlayersLiveBallTouches",
                f"{team_type}_PlayersAttemptedDribbles",
                f"{team_type}_PlayersDribblesCompleted",
                f"{team_type}_PlayersBallCarries",
                f"{team_type}_PlayersDistanceCarried",
                f"{team_type}_PlayersForwardDistanceCarried",
                f"{team_type}_PlayersForwardCarries",
                f"{team_type}_PlayersCarriesInAttack",
                f"{team_type}_PlayersAwayPenaltyAreaCarries",
                f"{team_type}_PlayersLostControlCarries",
                f"{team_type}_PlayersLostCarries",
                f"{team_type}_PlayersPassesReception",
                f"{team_type}_PlayersAttackPassesReception",
                f"{team_type}_PlayersYellowCards",
                f"{team_type}_PlayersRedCards",
                f"{team_type}_PlayersSecondYellowCards",
                f"{team_type}_PlayersFouls",
                f"{team_type}_PlayersFoulsReceived",
                f"{team_type}_PlayersPenalties",
                f"{team_type}_PlayersPenaltiesConceded",
                f"{team_type}_PlayersLostBallRecoveries",
                f"{team_type}_PlayersAerialsWon",
                f"{team_type}_PlayersAerialsLost",
            ]

            # Convert the mean columns to numeric
            for col in columns_to_mean:
                final_table[col] = final_table[col].apply(
                    pd.to_numeric, errors="coerce"
                )

            # Convert the sum columns to numeric
            for col in columns_to_sum:
                final_table[col] = final_table[col].apply(
                    pd.to_numeric, errors="coerce"
                )

            # Calculate the mean and sum for specified columns
            mean_values = final_table[columns_to_mean].mean()
            sum_values = final_table[columns_to_sum].sum()

            avg_min = self.avg_minutes_per_player(final_table, team_type)
            avg_min_col = f"{team_type}_PlayersAvgMinutes"

            # Ensure the new column exists before creating total_row
            if avg_min_col not in final_table.columns:
                final_table[avg_min_col] = np.nan

            # Create a new row for totals with placeholder values
            total_row = {col: "-" for col in final_table.columns}

            # Populate the total row with mean values
            for col, mean in mean_values.items():
                total_row[col] = mean

            # Populate the total row with sum values
            for col, total in sum_values.items():
                total_row[col] = total

            # Inject the team-level average minutes per player
            total_row[avg_min_col] = (
                None if pd.isna(avg_min) else float(round(avg_min, 1))
            )

            # Add the number of rows (lines) to the first column of the total row
            num_lines = len(final_table)
            total_row[final_table.columns[0]] = num_lines

            # Check if the 'id' column exists, if not, create it with NaN values
            if "id" not in final_table.columns:
                final_table["id"] = np.nan

            # Append the total row to the DataFrame
            final_table.loc[len(final_table)] = total_row

            # Define the columns to append
            columns_to_append = [
                f"{team_type}_Players",
                f"{team_type}_PlayersAge",
                f"{team_type}_PlayersMinutes",
                avg_min_col,
                f"{team_type}_PlayersShots",
                f"{team_type}_PlayersShotsOnTarget",
                f"{team_type}_PlayersCompletedPasses",
                f"{team_type}_PlayersAttemptedPasses",
                f"{team_type}_Players%CompletedPasses",
                f"{team_type}_PlayersDistancePasses",
                f"{team_type}_PlayersDistanceProgression",
                f"{team_type}_PlayersShortPasses",
                f"{team_type}_PlayersAttemptedShortPasses",
                f"{team_type}_Players%ShortCompletedPasses",
                f"{team_type}_PlayersMediumPasses",
                f"{team_type}_PlayersAttemptedMediumPasses",
                f"{team_type}_Players%MediumCompletedPasses",
                f"{team_type}_PlayersLongPasses",
                f"{team_type}_PlayersAttemptedLongPasses",
                f"{team_type}_Players%LongCompletedPasses",
                f"{team_type}_PlayersAssistance",
                f"{team_type}_PlayersExpectedGoalsAssistance",
                f"{team_type}_PlayersExpectedAssistance",
                f"{team_type}_PlayersKeyPasses",
                f"{team_type}_PlayersLast1/3Passes",
                f"{team_type}_PlayersGoalAreaPasses",
                f"{team_type}_PlayersGoalAreaCrosses",
                f"{team_type}_PlayersGoalPasses",
                f"{team_type}_PlayersLiveBallPasses",
                f"{team_type}_PlayersDeadBallPasses",
                f"{team_type}_PlayersFreeKick",
                f"{team_type}_PlayersThroughPasses",
                f"{team_type}_PlayersSidePasses",
                f"{team_type}_PlayersCrosses",
                f"{team_type}_PlayersStrongcrosses",
                f"{team_type}_PlayersCorner",
                f"{team_type}_PlayersCornerIn",
                f"{team_type}_PlayersCornerOut",
                f"{team_type}_PlayersCornerRect",
                f"{team_type}_PlayersOffsidePasses",
                f"{team_type}_PlayersPassesLost",
                f"{team_type}_PlayersTackles",
                f"{team_type}_PlayersSuccessfulTackles",
                f"{team_type}_PlayersTacklesInDefense",
                f"{team_type}_PlayersTacklesInMedium",
                f"{team_type}_PlayersTacklesInAttack",
                f"{team_type}_PlayersDribblerTackles",
                f"{team_type}_PlayersAttemptedDribblerTackles",
                f"{team_type}_Players%DribblerTacklesCompleted",
                f"{team_type}_PlayersDribblerTacklesNonCompleted",
                f"{team_type}_PlayersBallsBlocked",
                f"{team_type}_PlayersShotsBlocked",
                f"{team_type}_PlayersPassesBlocked",
                f"{team_type}_PlayersInterceptions",
                f"{team_type}_PlayersTackles+Interceptions",
                f"{team_type}_PlayersClearances",
                f"{team_type}_PlayersMistakesRivalShots",
                f"{team_type}_PlayersTouches",
                f"{team_type}_PlayersOwnPenaltyAreaTouches",
                f"{team_type}_PlayersTouchesInDefense",
                f"{team_type}_PlayersTouchesInMedium",
                f"{team_type}_PlayersTouchesInAttack",
                f"{team_type}_PlayersAwayPenaltyAreaTouches",
                f"{team_type}_PlayersLiveBallTouches",
                f"{team_type}_PlayersAttemptedDribbles",
                f"{team_type}_PlayersDribblesCompleted",
                f"{team_type}_Players%DribblesCompleted",
                f"{team_type}_PlayersBallCarries",
                f"{team_type}_PlayersDistanceCarried",
                f"{team_type}_PlayersForwardDistanceCarried",
                f"{team_type}_PlayersForwardCarries",
                f"{team_type}_PlayersCarriesInAttack",
                f"{team_type}_PlayersAwayPenaltyAreaCarries",
                f"{team_type}_PlayersLostControlCarries",
                f"{team_type}_PlayersLostCarries",
                f"{team_type}_PlayersPassesReception",
                f"{team_type}_PlayersAttackPassesReception",
                f"{team_type}_PlayersYellowCards",
                f"{team_type}_PlayersRedCards",
                f"{team_type}_PlayersSecondYellowCards",
                f"{team_type}_PlayersFouls",
                f"{team_type}_PlayersFoulsReceived",
                f"{team_type}_PlayersPenalties",
                f"{team_type}_PlayersPenaltiesConceded",
                f"{team_type}_PlayersLostBallRecoveries",
                f"{team_type}_PlayersAerialsWon",
                f"{team_type}_PlayersAerialsLost",
                f"{team_type}_Players%AerialsWon",
            ]

            # Extract a one-row DataFrame (last row) with desired columns
            desired_cols = ["id"] + columns_to_append
            one_row_df = final_table.loc[
                [final_table.index[-1]], desired_cols
            ].reset_index(drop=True)

            # Store in dictionary
            team_results[team_type] = one_row_df

            filtered_df["team"] = team_type

            filtered_df["competition_type"] = self.comp_type

            filtered_df["team_name"] = teams_data[team_type]["name"]

            final_df_players[team_type] = filtered_df

        # Combine home and away data into a single row
        # Since both have 'id' column, we'll merge on index (both should be single-row DataFrames)
        combined_df = pd.concat([team_results["home"], team_results["away"]], axis=1)

        # Concatenar VERTICALMENTE (axis=0) para apilar jugadores
        combined_df_players = pd.concat(
            [final_df_players["home"], final_df_players["away"]],
            axis=0,  # Apilar verticalmente
            ignore_index=True,  # Resetear índice
        )

        # Remove duplicate 'id' column if it exists
        if combined_df.columns.tolist().count("id") > 1:
            # Keep only the first 'id' column
            cols = combined_df.columns.tolist()
            id_indices = [i for i, col in enumerate(cols) if col == "id"]
            # Drop all 'id' columns except the first one
            combined_df = combined_df.iloc[
                :, [i for i in range(len(cols)) if i not in id_indices[1:]]
            ]

        return combined_df, combined_df_players

    def get_keepers_data(self, match_id: int, teams_data, soup) -> dict:
        """
        Extract keeper statistics for both teams and return as dict.
        Does NOT save to CSV - returns aggregated data in memory.
        """
        result = {"id": match_id}

        final_df_keepers = {}

        for team_type in ["home", "away"]:
            try:
                # Extract keeper table
                final_table = self.extract_keeper_table(
                    team_type, header_offset=7, teams_data=teams_data, soup=soup
                )
                final_table = final_table.reset_index(drop=True)

                # Define column names (24 columns expected)
                new_columns = [
                    f"{team_type}_Keepers",
                    f"{team_type}_KeepersNationality",
                    f"{team_type}_KeepersAge",
                    f"{team_type}_KeepersMinutes",
                    f"{team_type}_KeepersShotsOnTargetAgainst",
                    f"{team_type}_KeepersGoalsAgainst",
                    f"{team_type}_KeepersSaved",
                    f"{team_type}_Keepers%Saved",
                    f"{team_type}_KeepersxG",
                    f"{team_type}_KeepersPassesLaunched",
                    f"{team_type}_KeepersAttemptedPassesLaunched",
                    f"{team_type}_Keepers%CompletedPassesLaunched",
                    f"{team_type}_KeepersPasses",
                    f"{team_type}_KeepersAttemptedPasses",
                    f"{team_type}_Keepers%CompletedPasses",
                    f"{team_type}_KeepersPassesDistance",
                    f"{team_type}_KeepersAttemptedKicks",
                    f"{team_type}_Keepers%Kicks",
                    f"{team_type}_KeepersKicksDistance",
                    f"{team_type}_KeepersCrosses",
                    f"{team_type}_KeepersCrossesStopped",
                    f"{team_type}_Keepers%CrossesStopped",
                    f"{team_type}_KeepersActionsOutsideArea",
                    f"{team_type}_KeepersDistanceActionsArea",
                ]

                # Only rename if column count matches
                if len(final_table.columns) == len(new_columns):
                    final_table.columns = new_columns
                else:
                    final_table.columns = [
                        f"{team_type}_Keeper_{i}"
                        for i in range(len(final_table.columns))
                    ]

                # Clean age column
                age_col = f"{team_type}_KeepersAge"
                if age_col in final_table.columns:
                    final_table[age_col] = final_table[age_col].apply(
                        lambda x: (
                            int(x[:2]) if isinstance(x, str) and x[:2].isdigit() else 0
                        )
                    )

                if len(final_table) == 0:
                    print(f"No keeper data for {team_type} team")
                    continue

                # Define las columnas que quieres mantener
                columns_to_keep_keepers = [
                    f"{team_type}_Keepers",
                    f"{team_type}_KeepersNationality",
                    f"{team_type}_KeepersAge",
                    f"{team_type}_KeepersMinutes",
                    f"{team_type}_KeepersShotsOnTargetAgainst",
                    f"{team_type}_KeepersGoalsAgainst",
                    f"{team_type}_KeepersSaved",
                    f"{team_type}_KeepersxG",
                    f"{team_type}_KeepersPasses",
                    f"{team_type}_KeepersAttemptedPasses",
                    f"{team_type}_KeepersPassesDistance",
                    f"{team_type}_KeepersPassesLaunched",
                    f"{team_type}_KeepersAttemptedPassesLaunched",
                    f"{team_type}_KeepersAttemptedKicks",
                    f"{team_type}_KeepersKicksDistance",
                    f"{team_type}_KeepersCrosses",
                    f"{team_type}_KeepersCrossesStopped",
                    f"{team_type}_KeepersActionsOutsideArea",
                    f"{team_type}_KeepersDistanceActionsArea",
                    f"{team_type}_Keepers%Saved",
                    f"{team_type}_Keepers%CompletedPasses",
                    f"{team_type}_Keepers%CompletedPassesLaunched",
                    f"{team_type}_Keepers%Kicks",
                    f"{team_type}_Keepers%CrossesStopped",
                ]

                # Filtrar solo las columnas que existen
                available_cols = [
                    col for col in columns_to_keep_keepers if col in final_table.columns
                ]

                # Crear DataFrame con solo esas columnas
                filtered_df = final_table[available_cols].copy()

                # Renombrar columnas quitando el prefijo
                filtered_df.columns = filtered_df.columns.str.replace(
                    f"{team_type}_", ""
                )

                # Añadir columna team
                filtered_df["team"] = team_type

                filtered_df["competition_type"] = self.comp_type

                filtered_df["team_name"] = teams_data[team_type]["name"]

                filtered_df["Keepers"] = (
                    filtered_df["Keepers"]
                    .astype(str)
                    .str.strip()
                    .str.lower()
                    .str.normalize("NFKD")
                    .str.encode("ascii", "ignore")
                    .str.decode("utf-8")  # quita tildes
                    .str.replace(
                        r"[^a-z0-9]+", "_", regex=True
                    )  # cualquier separador -> "_"
                    .str.replace(r"_+", "_", regex=True)  # colapsa múltiples "_"
                    .str.strip("_")  # quita "_" al inicio/fin
                )

                # Guardar en el diccionario
                final_df_keepers[team_type] = filtered_df

                # Get existing columns para aggregaciones
                all_columns = set(final_table.columns)

                # Define aggregation columns
                columns_to_mean = [
                    f"{team_type}_KeepersAge",
                    f"{team_type}_Keepers%Saved",
                    f"{team_type}_Keepers%CompletedPassesLaunched",
                    f"{team_type}_Keepers%CompletedPasses",
                    f"{team_type}_KeepersPassesDistance",
                    f"{team_type}_Keepers%Kicks",
                    f"{team_type}_KeepersKicksDistance",
                    f"{team_type}_Keepers%CrossesStopped",
                    f"{team_type}_KeepersDistanceActionsArea",
                ]
                columns_to_mean = [col for col in columns_to_mean if col in all_columns]

                columns_to_sum = [
                    f"{team_type}_Keepers",
                    f"{team_type}_KeepersMinutes",
                    f"{team_type}_KeepersShotsOnTargetAgainst",
                    f"{team_type}_KeepersGoalsAgainst",
                    f"{team_type}_KeepersSaved",
                    f"{team_type}_KeepersxG",
                    f"{team_type}_KeepersPassesLaunched",
                    f"{team_type}_KeepersAttemptedPassesLaunched",
                    f"{team_type}_KeepersPasses",
                    f"{team_type}_KeepersAttemptedPasses",
                    f"{team_type}_KeepersAttemptedKicks",
                    f"{team_type}_KeepersCrosses",
                    f"{team_type}_KeepersCrossesStopped",
                    f"{team_type}_KeepersActionsOutsideArea",
                ]
                columns_to_sum = [col for col in columns_to_sum if col in all_columns]

                # Convert to numeric safely
                for col in columns_to_mean + columns_to_sum:
                    if col in final_table.columns:
                        try:
                            final_table[col] = pd.to_numeric(
                                final_table[col], errors="coerce"
                            )
                        except Exception as e:
                            print(f"Could not convert {col} to numeric: {e}")

                # Calculate aggregations
                mean_values = (
                    final_table[columns_to_mean].mean()
                    if columns_to_mean
                    else pd.Series()
                )
                sum_values = (
                    final_table[columns_to_sum].sum() if columns_to_sum else pd.Series()
                )

                # Store results
                result[f"{team_type}_KeepersCount"] = len(final_table)

                for col, val in mean_values.items():
                    result[col] = None if pd.isna(val) else float(val)

                for col, val in sum_values.items():
                    result[col] = None if pd.isna(val) else float(val)

            except Exception as e:
                print(f"Error processing {team_type} team keepers: {e}")
                print(traceback.format_exc())
                continue

        # FUERA DEL LOOP: Concatenar los DataFrames
        if len(final_df_keepers) == 2:  # Solo si tenemos ambos equipos
            combined_df_keepers = pd.concat(
                [final_df_keepers["home"], final_df_keepers["away"]],
                axis=0,
                ignore_index=True,
            )

            # Reordenar columnas para poner 'team' al principio
            cols = ["team"] + [
                col for col in combined_df_keepers.columns if col != "team"
            ]
            combined_df_keepers = combined_df_keepers[cols]
        else:
            print("No se pudieron obtener datos de ambos porteros")

        return result, combined_df_keepers

    # ===================== RUN ALL =====================

    def run_after(self) -> pd.DataFrame:
        """
        Main unified method: fetches matches, basic details, players and keepers.
        Returns everything as a single DataFrame without saving to CSV.
        """
        # ========== STEP 1: Build match skeleton ==========
        df = self._build_base_matches_df()
        
        # Sanity check
        if "link" not in df.columns:
            raise RuntimeError("Expected 'link' column in df.")

        # Base list of links (full)
        links = df["link"].astype(str).tolist()

        # ========== STEP 3: Initialize ID numbering ==========
        last_id = 0

        # ========== STEP 4: Process each match ==========
        processed_count = 0
        skipped_count = 0

        all_summerize_players_data = (
            []
        )  # List to accumulate player dicts (one per match)
        all_summerize_keepers_data = (
            []
        )  # List to accumulate keeper dicts (one per match)
        all_individual_players_data = (
            []
        )  # List to accumulate individual player DataFrames
        all_individual_keepers_data = (
            []
        )  # List to accumulate individual keeper DataFrames

        for idx, link in enumerate(links):

            # --- PART A: Basic match details ---
            try:
                df = self._enrich_one_match_page_no_csv(df, idx, link, last_id)
            except Exception as e:
                print(f"Error in basic details: {e}")
                skipped_count += 1
                all_summerize_players_data.append(
                    {}
                )  # Add empty dict to maintain alignment
                all_summerize_keepers_data.append(
                    {}
                )  # Add empty dict to maintain alignment
                all_individual_players_data.append(
                    pd.DataFrame()
                )  # Add empty DataFrame to maintain alignment
                all_individual_keepers_data.append(
                    pd.DataFrame()
                )  # Add empty DataFrame to
                continue

            _id = df.at[idx, "id"] if "id" in df.columns else None
            if pd.isna(_id) or _id is None:
                skipped_count += 1
                all_summerize_players_data.append(
                    {}
                )  # Add empty dict to maintain alignment
                all_summerize_keepers_data.append(
                    {}
                )  # Add empty dict to maintain alignment
                all_individual_players_data.append(
                    pd.DataFrame()
                )  # Add empty DataFrame to maintain alignment
                all_individual_keepers_data.append(
                    pd.DataFrame()
                )  # Add empty DataFrame to
                continue

            match_id = int(_id)
            last_id = max(last_id, match_id)

            # --- PART B: Players and keepers ---
            try:

                if not link or not str(link).startswith(("http://", "https://")):
                    raise ValueError(f"Invalid link: {link}")

                # Extract match information (teams_data and soup)
                teams_data, soup = self._extract_match_information(link)

                # Get player data for this match (returns dict)
                summerize_players_data, individual_players_data = self.get_players_data(
                    teams_data, soup
                )
                all_summerize_players_data.append(
                    summerize_players_data
                )  # Append to list
                all_individual_players_data.append(
                    individual_players_data
                )  # Append individual DataFrame

                # Get keeper data for this match (returns dict)
                summerize_keepers_data, individual_keepers_data = self.get_keepers_data(
                    match_id, teams_data, soup
                )
                all_summerize_keepers_data.append(
                    summerize_keepers_data
                )  # Append to list
                all_individual_keepers_data.append(
                    individual_keepers_data
                )  # Append individual DataFrame

                processed_count += 1

            except Exception as e:
                print(f"Error in players/keepers for ID {match_id}: {e}")
                print(traceback.format_exc())
                all_summerize_players_data.append({})  # Add empty dict on error
                all_summerize_keepers_data.append({})  # Add empty dict on error
                all_individual_players_data.append(
                    pd.DataFrame()
                )  # Add empty DataFrame on error
                all_individual_keepers_data.append(
                    pd.DataFrame()
                )  # Add empty DataFrame on error

            # Pause between matches (politeness)
            time.sleep(self.sleep_seconds)

        # ========== STEP 5: Merge all data ==========
        df_result = df.copy()

        # Merge player data (all_summerize_players_data is now a LIST of DataFrames or dicts)
        if len(all_summerize_players_data) > 0:
            # Filter out empty items (could be empty dicts, None, or empty DataFrames)
            valid_players = []
            for item in all_summerize_players_data:
                # Check if it's a DataFrame
                if isinstance(item, pd.DataFrame):
                    if not item.empty:
                        valid_players.append(item)
                # Check if it's a dict
                elif isinstance(item, dict):
                    if len(item) > 0:
                        non_id_keys = [k for k in item.keys() if k != "id"]
                        if non_id_keys:
                            valid_players.append(item)

            if valid_players:
                try:
                    # Check if we have DataFrames or dicts
                    if isinstance(valid_players[0], pd.DataFrame):
                        # Concatenate DataFrames
                        df_players = pd.concat(valid_players, ignore_index=True)
                    else:
                        # Create DataFrame from dicts
                        df_players = pd.DataFrame(valid_players)

                    # Verify length matches
                    if len(df_players) == len(df_result):
                        # OPTIMIZADO: Concatenar todas las columnas de una vez
                        cols_to_add = [col for col in df_players.columns if col != "id"]
                        if cols_to_add:
                            df_result = pd.concat(
                                [df_result, df_players[cols_to_add]], axis=1
                            )
                    else:
                        print(f"Length mismatch: {len(df_players)} vs {len(df_result)}")
                except Exception as e:
                    print(f"Error merging player data: {e}")
                    print(traceback.format_exc())
            else:
                print("No valid player data to merge")

        # Merge keeper data (all_summerize_keepers_data is now a LIST of DataFrames or dicts)
        if len(all_summerize_keepers_data) > 0:
            # Filter out empty items (could be empty dicts, None, or empty DataFrames)
            valid_keepers = []
            for item in all_summerize_keepers_data:
                # Check if it's a DataFrame
                if isinstance(item, pd.DataFrame):
                    if not item.empty:
                        valid_keepers.append(item)
                # Check if it's a dict
                elif isinstance(item, dict):
                    if len(item) > 0:
                        non_id_keys = [k for k in item.keys() if k != "id"]
                        if non_id_keys:
                            valid_keepers.append(item)

            if valid_keepers:
                try:
                    # Check if we have DataFrames or dicts
                    if isinstance(valid_keepers[0], pd.DataFrame):
                        # Concatenate DataFrames
                        df_keepers = pd.concat(valid_keepers, ignore_index=True)
                    else:
                        # Create DataFrame from dicts
                        df_keepers = pd.DataFrame(valid_keepers)

                    # Verify length matches
                    if len(df_keepers) == len(df_result):
                        # OPTIMIZADO: Concatenar todas las columnas de una vez
                        cols_to_add = [col for col in df_keepers.columns if col != "id"]
                        if cols_to_add:
                            df_result = pd.concat(
                                [df_result, df_keepers[cols_to_add]], axis=1
                            )
                    else:
                        print(f"Length mismatch: {len(df_keepers)} vs {len(df_result)}")
                except Exception as e:
                    print(f"Error merging keeper data: {e}")
                    print(traceback.format_exc())
            else:
                print("No valid keeper data to merge")

        # Remove internal columns
        columns_to_drop = [c for c in ["link", "id"] if c in df_result.columns]
        if columns_to_drop:
            df_result = df_result.drop(columns=columns_to_drop)

        # ========== STEP 6: Procesar porteros - Transferir tarjetas y limpiar ==========

        # Concatenar todos los DataFrames individuales de jugadores
        if all_individual_players_data:
            valid_player_dfs = [
                df for df in all_individual_players_data 
                if isinstance(df, pd.DataFrame) and not df.empty
            ]
            
            if valid_player_dfs:
                df_all_players = pd.concat(valid_player_dfs, ignore_index=True)
                
                # 1. Identificar porteros (GK) en el dataframe de jugadores
                if 'Position' in df_all_players.columns:
                    gk_mask = df_all_players['Position'] == 'GK'
                    df_gk_players = df_all_players[gk_mask].copy()
                    
                    # 2. Si hay porteros y tenemos datos de keepers
                    if not df_gk_players.empty and all_individual_keepers_data:
                        valid_keeper_dfs = [
                            df for df in all_individual_keepers_data 
                            if isinstance(df, pd.DataFrame) and not df.empty
                        ]
                        
                        if valid_keeper_dfs:
                            df_all_keepers = pd.concat(valid_keeper_dfs, ignore_index=True)
                            
                            # 3. Transferir tarjetas de jugadores GK a keepers
                            if 'Players' in df_all_keepers.columns and 'Players' in df_gk_players.columns:
                                # Crear columnas si no existen en keepers
                                if 'PlayersYellowCards' not in df_all_keepers.columns:
                                    df_all_keepers['PlayersYellowCards'] = 0
                                if 'PlayersRedCards' not in df_all_keepers.columns:
                                    df_all_keepers['PlayersRedCards'] = 0
                                
                                # Para cada portero en players, buscar en keepers y transferir tarjetas
                                transferred_count = 0
                                for idx, gk_row in df_gk_players.iterrows():
                                    player_name = gk_row['Players']
                                    yellow_cards = gk_row.get('PlayersYellowCards', 0)
                                    red_cards = gk_row.get('PlayersRedCards', 0)
                                    
                                    # Buscar coincidencias en keepers
                                    keeper_mask = df_all_keepers['Players'] == player_name
                                    if keeper_mask.any():
                                        df_all_keepers.loc[keeper_mask, 'PlayersYellowCards'] = yellow_cards
                                        df_all_keepers.loc[keeper_mask, 'PlayersRedCards'] = red_cards
                                        transferred_count += keeper_mask.sum()
                            
                            # 4. AÑADIR GAMEWEEK A KEEPERS CON NORMALIZACIÓN
                            
                            if 'gameweek' in df_result.columns:                                
                                # Crear diccionario con normalización de nombres
                                team_to_gameweek = {}
                                
                                if 'home_team_name' in df_result.columns and 'away_team_name' in df_result.columns:
                                    # Mapear equipos locales y visitantes
                                    for _, row in df_result.iterrows():
                                        gw = row['gameweek']
                                        if pd.notna(gw):
                                            # Home team
                                            if pd.notna(row['home_team_name']):
                                                home_original = row['home_team_name']
                                                home_normalized = normalize_name(home_original)
                                                team_to_gameweek[home_original] = gw
                                                team_to_gameweek[home_normalized] = gw
                                            
                                            # Away team
                                            if pd.notna(row['away_team_name']):
                                                away_original = row['away_team_name']
                                                away_normalized = normalize_name(away_original)
                                                team_to_gameweek[away_original] = gw
                                                team_to_gameweek[away_normalized] = gw
                                
                                # Verificar qué columna tiene el nombre del equipo en keepers
                                team_col_keepers = 'team_name'
                                
                                # Aplicar gameweek a keepers
                                if team_col_keepers and team_to_gameweek:
                                    # Crear columna temporal con nombres normalizados
                                    df_all_keepers['team_normalized'] = df_all_keepers[team_col_keepers].apply(normalize_name)
                                    
                                    # Intentar mapeo con nombre original
                                    df_all_keepers['gameweek'] = df_all_keepers[team_col_keepers].map(team_to_gameweek)
                                    
                                    # Para los que no encontraron match, intentar con nombre normalizado
                                    null_mask = df_all_keepers['gameweek'].isna()
                                    if null_mask.any():
                                        df_all_keepers.loc[null_mask, 'gameweek'] = df_all_keepers.loc[null_mask, 'team_normalized'].map(team_to_gameweek)
                                    
                                    # Limpiar columna temporal
                                    df_all_keepers.drop(columns=['team_normalized'], inplace=True)
                                else:
                                    print(f"ERROR: team_col_keepers='{team_col_keepers}', len(team_to_gameweek)={len(team_to_gameweek)}")
                            
                            # Actualizar all_individual_keepers_data con el dataframe modificado
                            all_individual_keepers_data = [df_all_keepers]
                        else:
                            print("No hay DataFrames válidos de keepers")
                    
                    # 5. Eliminar porteros del dataframe de jugadores
                    df_all_players = df_all_players[~gk_mask].reset_index(drop=True)
                    
                    # 6. AÑADIR GAMEWEEK A JUGADORES CON NORMALIZACIÓN
                    
                    if 'gameweek' in df_result.columns:
                        # Crear diccionario con normalización de nombres
                        team_to_gameweek = {}
                        
                        if 'home_team_name' in df_result.columns and 'away_team_name' in df_result.columns:
                            # Mapear equipos
                            for _, row in df_result.iterrows():
                                gw = row['gameweek']
                                if pd.notna(gw):
                                    # Home team
                                    if pd.notna(row['home_team_name']):
                                        home_original = row['home_team_name']
                                        home_normalized = normalize_name(home_original)
                                        team_to_gameweek[home_original] = gw
                                        team_to_gameweek[home_normalized] = gw
                                    
                                    # Away team
                                    if pd.notna(row['away_team_name']):
                                        away_original = row['away_team_name']
                                        away_normalized = normalize_name(away_original)
                                        team_to_gameweek[away_original] = gw
                                        team_to_gameweek[away_normalized] = gw
                                                    
                        # Verificar qué columna tiene el nombre del equipo en players
                        team_col_players = 'team_name'
                        
                        # Aplicar gameweek a jugadores
                        if team_col_players and team_to_gameweek:
                            # Crear columna temporal con nombres normalizados
                            df_all_players['team_normalized'] = df_all_players[team_col_players].apply(normalize_name)
                            
                            # Intentar mapeo con nombre original
                            df_all_players['gameweek'] = df_all_players[team_col_players].map(team_to_gameweek)
                            
                            # Para los que no encontraron match, intentar con nombre normalizado
                            null_mask = df_all_players['gameweek'].isna()
                            if null_mask.any():
                                df_all_players.loc[null_mask, 'gameweek'] = df_all_players.loc[null_mask, 'team_normalized'].map(team_to_gameweek)
                            
                            # Limpiar columna temporal
                            df_all_players.drop(columns=['team_normalized'], inplace=True)
                        else:
                            print(f"ERROR: team_col_players='{team_col_players}', len(team_to_gameweek)={len(team_to_gameweek)}")
                    
                    # Actualizar all_individual_players_data
                    all_individual_players_data = [df_all_players]
                else:
                    print("Columna 'Position' no encontrada en players DataFrame")
                    all_individual_players_data = [df_all_players]
            else:
                print("No hay DataFrames válidos de jugadores")

        # ========== STEP 7: Final summary ==========
        return df_result, all_individual_players_data, all_individual_keepers_data

    def run_before(self) -> pd.DataFrame:
        # ========== STEP 1: Build match skeleton ==========
        df = self._build_base_matches_df()

        # Sanity check
        if "link" not in df.columns:
            raise RuntimeError("Expected 'link' column in df.")

        # Base list of links (full)
        links = df["link"].astype(str).tolist()

        # ========== STEP 3: Initialize ID numbering ==========
        last_id = 0

        # ========== STEP 4: Process each match ==========
        processed_count = 0
        skipped_count = 0

        for idx, link in enumerate(links):

            # --- PART A: Basic match details ---
            try:
                df = self._enrich_one_match_page_no_csv(df, idx, link, last_id)
            except Exception as e:
                print(f"Error in basic details: {e}")
                skipped_count += 1
                continue

            _id = df.at[idx, "id"] if "id" in df.columns else None
            if pd.isna(_id) or _id is None:
                skipped_count += 1
                continue

            match_id = int(_id)
            last_id = max(last_id, match_id)

            processed_count += 1

            # Pause between matches (politeness)
            time.sleep(self.sleep_seconds)

        # ========== STEP 5: Merge all data ==========
        df_result = df.copy()

        # Remove internal columns
        columns_to_drop = [c for c in ["link", "id"] if c in df_result.columns]
        if columns_to_drop:
            df_result = df_result.drop(columns=columns_to_drop)

        df_result["competition_type"] = self.comp_type
        df_result["country"] = self.country

        # ========== STEP 7: Final summary ==========
        return df_result

    def scrape_league_standings(
        self, 
        ranking_csv_path: str,
        recent_matches_df: pd.DataFrame,
        gameweek: int,
        verbose: bool = False
    ) -> pd.DataFrame:
        """
        Scrape league standings, save to CSV, and enrich matches DataFrame with team stats.
        
        Parameters:
        -----------
        ranking_csv_path : str
            Path to CSV file where standings will be appended
        recent_matches_df : pd.DataFrame
            DataFrame with recent matches to enrich with standings data
        gameweek : int
            Current gameweek number
        verbose : bool
            Print debugging information
            
        Returns:
        --------
        pd.DataFrame
            Enriched matches DataFrame with team rankings and stats
        """
        
        # Format URL with last_season
        url = self.url_ranking
        
        if verbose:
            print(f"[STANDINGS] Scraping standings for gameweek {gameweek}")
            print(f"[STANDINGS] Formatted URL: {url}")
        
        # Fetch the page with headless scraper
        soup = get_soup(
            url=url,
            human_behavior=True,
            timeout_ms=60000,
            ensure_server=True,
            max_retries=3,
            verbose=verbose
        )
        
        # Find the standings table
        table = soup.find('table', {'class': 'stats_table'})
        
        if not table:
            raise ValueError("Could not find standings table on page")
        
        if verbose:
            print("[STANDINGS] Table found, extracting data...")
        
        # Extract rows
        rows_data = []
        tbody = table.find('tbody')
        if tbody:
            for row in tbody.find_all('tr'):
                # Skip header rows inside tbody
                if 'thead' in row.get('class', []):
                    continue
                    
                row_data = {}
                
                # Extract rank
                rank_th = row.find('th', {'data-stat': 'rank'})
                if rank_th:
                    row_data['Rk'] = rank_th.text.strip()
                
                # Extract all td cells
                for td in row.find_all('td'):
                    data_stat = td.get('data-stat', '')
                    
                    # Get team name
                    if data_stat == 'team':
                        team_link = td.find('a')
                        if team_link:
                            row_data['Squad'] = team_link.text.strip()
                        else:
                            row_data['Squad'] = td.text.strip()
                    
                    # Get numeric data
                    else:
                        row_data[data_stat] = td.text.strip()
                
                if row_data:
                    rows_data.append(row_data)
        
        # Create raw DataFrame
        df_raw = pd.DataFrame(rows_data)
        
        if verbose:
            print(f"[STANDINGS] Extracted {len(df_raw)} teams")
        
        # Transform to standardized format
        df_standings = pd.DataFrame({
            'gameweek': gameweek,
            'team_rank': pd.to_numeric(df_raw['Rk'], errors='coerce').astype(int),
            'team_name': df_raw['Squad'].apply(normalize_name),
            'matchs_played': pd.to_numeric(df_raw['games'], errors='coerce').astype(int),
            'matchs_won': pd.to_numeric(df_raw['wins'], errors='coerce').astype(int),
            'matchs_drawn': pd.to_numeric(df_raw['ties'], errors='coerce').astype(int),
            'matchs_lost': pd.to_numeric(df_raw['losses'], errors='coerce').astype(int),
            'team_goals_for': pd.to_numeric(df_raw['goals_for'], errors='coerce').astype(int),
            'team_goals_against': pd.to_numeric(df_raw['goals_against'], errors='coerce').astype(int),
            'team_goals_difference': pd.to_numeric(df_raw['goal_diff'], errors='coerce').astype(int),
            'team_points': pd.to_numeric(df_raw['points'], errors='coerce').astype(int)
        })
        
        # Sort by rank
        df_standings = df_standings.sort_values('team_rank').reset_index(drop=True)
        
        if verbose:
            print(f"[STANDINGS] Transformation complete. Shape: {df_standings.shape}")
        
        # Save to CSV (append mode)
        from pathlib import Path
        csv_path = Path(ranking_csv_path)
        
        if csv_path.exists():
            # Append to existing file
            df_standings.to_csv(csv_path, mode='a', header=False, index=False)
            if verbose:
                print(f"[STANDINGS] Appended to existing CSV: {ranking_csv_path}")
        else:
            # Create new file with header
            df_standings.to_csv(csv_path, mode='w', header=True, index=False)
            if verbose:
                print(f"[STANDINGS] Created new CSV: {ranking_csv_path}")
        
        # Enrich matches DataFrame with standings data
        df_matches_enriched = recent_matches_df.copy()
        
        # Merge home team stats
        df_matches_enriched = df_matches_enriched.merge(
            df_standings[['team_name', 'team_rank', 'team_points', 'team_goals_for', 
                        'team_goals_against', 'team_goals_difference']],
            left_on='home_team',
            right_on='team_name',
            how='left',
            suffixes=('', '_home')
        )
        
        # Rename home team columns
        df_matches_enriched.rename(columns={
            'team_rank': 'home_team_rank',
            'team_points': 'home_team_points',
            'team_goals_for': 'home_team_goals_for',
            'team_goals_against': 'home_team_goals_against',
            'team_goals_difference': 'home_team_goals_difference'
        }, inplace=True)
        
        # Drop temporary column
        df_matches_enriched.drop(columns=['team_name'], inplace=True, errors='ignore')
        
        # Merge away team stats
        df_matches_enriched = df_matches_enriched.merge(
            df_standings[['team_name', 'team_rank', 'team_points', 'team_goals_for', 
                        'team_goals_against', 'team_goals_difference']],
            left_on='away_team',
            right_on='team_name',
            how='left',
            suffixes=('', '_away')
        )
        
        # Rename away team columns
        df_matches_enriched.rename(columns={
            'team_rank': 'away_team_rank',
            'team_points': 'away_team_points',
            'team_goals_for': 'away_team_goals_for',
            'team_goals_against': 'away_team_goals_against',
            'team_goals_difference': 'away_team_goals_difference'
        }, inplace=True)
        
        # Drop temporary column
        df_matches_enriched.drop(columns=['team_name'], inplace=True, errors='ignore')
        
        if verbose:
            print(f"[STANDINGS] Enriched matches DataFrame. Shape: {df_matches_enriched.shape}")
            print(f"[STANDINGS] Added columns: home/away_team_rank, points, goals_for, goals_against, goals_difference")
        
        return df_matches_enriched