"""Post-processing utilities for cleaning LLM conversion results."""

from typing import Callable, List


class ConversionResultCleanHelper:
    """
    Helper class to clean and extract Python code blocks from text generated by LLMs.
    """

    TRIPLE_BACK_QUOTES_WITH_PYTHON = "```python"
    TRIPLE_BACK_QUOTES = "```"

    def get_udf_functions(self) -> List[Callable[[str], str]]:
        """
        Returns a list of UDF functions for use in PySpark.
        """
        return [self.clean_python_code_blocks]

    def clean_python_code_blocks(self, text: str) -> str:
        """
        Cleans Python code blocks with two simple rules:
        1. If text starts with ```python and has a closing ```, extract the code between them
        2. If ```python appears in the middle of text, simply remove it

        Args:
            text: Input text containing Python code blocks

        Returns:
            Cleaned code
        """
        if text is None:
            return None

        # If no ```python exists, return the original text
        if self.TRIPLE_BACK_QUOTES_WITH_PYTHON not in text:
            return text

        # Check if text starts with ```python (allowing for whitespace at the beginning)
        if text.lstrip().startswith(self.TRIPLE_BACK_QUOTES_WITH_PYTHON):
            # Case 1: Text starts with ```python - extract code between ```python and ```
            parts = text.split(self.TRIPLE_BACK_QUOTES_WITH_PYTHON, 1)
            if len(parts) > 1 and self.TRIPLE_BACK_QUOTES in parts[1]:
                code = parts[1].split(self.TRIPLE_BACK_QUOTES, 1)[0]
                # Remove leading newline if present
                if code.startswith("\n"):
                    code = code[1:]
                # Ensure it ends with a newline
                if code and not code.endswith("\n"):
                    code += "\n"
                return code

        # Case 2: ```python appears in the middle - simply remove it
        text = text.replace(self.TRIPLE_BACK_QUOTES_WITH_PYTHON, "")

        return text
