# Copyright (c) 2023, Bas Nijholt
# All rights reserved.
"""Markdown Code Runner.

Automatically update Markdown files with code block output.

This script is part of the 'markdown-code-runner' package available on GitHub:
https://github.com/basnijholt/markdown-code-runner

Add code blocks between <!-- CODE:START --> and <!-- CODE:END --> in your Markdown file.
The output will be inserted between <!-- OUTPUT:START --> and <!-- OUTPUT:END -->.

Example:
-------
<!-- CODE:START -->
<!-- print('Hello, world!') -->
<!-- CODE:END -->
<!-- OUTPUT:START -->
This will be replaced by the output of the code block above.
<!-- OUTPUT:END -->

Alternatively, you can add a <!-- CODE:SKIP --> comment above a code block to skip execution.

Another way is to run code blocks in triple backticks:
```python markdown-code-runner
print('Hello, world!')
```
Which will print the output of the code block between the output markers:
<!-- OUTPUT:START -->
This will be replaced by the output of the code block above.
<!-- OUTPUT:END -->

You can also run bash code blocks:
```bash markdown-code-runner
echo "Hello, world!"
```
Which will similarly print the output of the code block between the next output markers.

"""

from __future__ import annotations

import argparse
import contextlib
import io
import os
import re
import subprocess
from dataclasses import dataclass, field
from importlib.metadata import PackageNotFoundError, version
from pathlib import Path
from typing import Any, Literal

try:
    __version__ = version("markdown-code-runner")
except PackageNotFoundError:  # pragma: no cover
    __version__ = "unknown"

DEBUG: bool = os.environ.get("DEBUG", "0") == "1"


def md_comment(text: str) -> str:
    """Format a string as a Markdown comment."""
    return f"<!-- {text} -->"


MARKERS = {
    "warning": md_comment(
        "⚠️ This content is auto-generated by `markdown-code-runner`.",
    ),
    "skip": md_comment("CODE:SKIP"),
    "code:comment:python:start": md_comment("CODE:START"),
    "code:comment:bash:start": md_comment("CODE:BASH:START"),
    "code:comment:end": md_comment("CODE:END"),
    "output:start": md_comment("OUTPUT:START"),
    "output:end": md_comment("OUTPUT:END"),
    "code:backticks:start": r"```(?P<language>\w+)\smarkdown-code-runner",
    "code:backticks:end": "```",
}


def markers_to_patterns() -> dict[str, re.Pattern]:
    """Convert the markers to regular expressions."""
    allow_spaces_before_text = r"^(?P<spaces>\s*)"
    patterns = {}
    for key, value in MARKERS.items():
        patterns[key] = re.compile(allow_spaces_before_text + value, re.MULTILINE)
    return patterns


PATTERNS = markers_to_patterns()


def is_marker(line: str, marker: str) -> re.Match | None:
    """Check if a line is a specific marker."""
    match = re.search(PATTERNS[marker], line)
    if DEBUG and match is not None:  # pragma: no cover
        print(f"Found marker {marker} in line {line}")
    return match


def remove_md_comment(commented_text: str) -> str:
    """Remove Markdown comment tags from a string."""
    commented_text = commented_text.strip()
    if not (commented_text.startswith("<!-- ") and commented_text.endswith(" -->")):
        msg = f"Invalid Markdown comment format: {commented_text}"
        raise ValueError(msg)
    return commented_text[5:-4]


def execute_code(
    code: list[str],
    context: dict[str, Any] | None = None,
    language: Literal["python", "bash"] | None = None,  # type: ignore[name-defined]
    *,
    output_file: str | Path | None = None,
    verbose: bool = False,
) -> list[str]:
    """Execute a code block and return its output as a list of strings."""
    if context is None:
        context = {}
    full_code = "\n".join(code)

    if verbose:
        print(_bold(f"\nExecuting code {language} block:"))
        print(f"\n{full_code}\n")

    if output_file is not None:
        output_file = Path(output_file)
        with output_file.open("w") as f:
            f.write(full_code)
        output = []
    elif language == "python":
        with io.StringIO() as string, contextlib.redirect_stdout(string):
            exec(full_code, context)  # noqa: S102
            output = string.getvalue().split("\n")
    elif language == "bash":
        result = subprocess.run(  # noqa: S602
            full_code,
            capture_output=True,
            text=True,
            shell=True,
            check=False,
        )
        output = result.stdout.split("\n")
    else:
        msg = "Specify 'output_file' for non-Python/Bash languages."
        raise ValueError(msg)

    if verbose:
        print(_bold("Output:"))
        print(f"\n{output}\n")

    return output


def _bold(text: str) -> str:
    """Format a string as bold."""
    bold = "\033[1m"
    reset = "\033[0m"
    return f"{bold}{text}{reset}"


def standardize_code_fences(content: str) -> str:
    """Strip markdown-code-runner modifiers from all code fence language identifiers.

    This is useful for making markdown files compatible with standard markdown
    processors like mkdocs and pandoc, which don't understand the
    ``python markdown-code-runner`` syntax.

    Parameters
    ----------
    content
        The markdown content as a string.

    Returns
    -------
    str
        The content with all code fence modifiers stripped.

    Examples
    --------
    >>> text = '''```python markdown-code-runner
    ... print("hello")
    ... ```'''
    >>> print(standardize_code_fences(text))
    ```python
    print("hello")
    ```

    """
    return re.sub(
        r"^(```\w+)\s+markdown-code-runner(?:\s+\S+=\S+)*\s*$",
        r"\1",
        content,
        flags=re.MULTILINE,
    )


def _extract_backtick_options(line: str) -> dict[str, str]:
    """Extract extra information from a line."""
    match = re.search(r"```(?P<language>\w+)", line)
    if not match:
        return {}

    result = {"language": match.group("language")}

    # Extract options after markdown-code-runner
    if "markdown-code-runner" in line:
        extra_str = line[match.end() :]
        for option_match in re.finditer(r"(?P<key>\w+)=(?P<value>\S+)", extra_str):
            result[option_match.group("key")] = option_match.group("value")

    return result


@dataclass
class ProcessingState:
    """State of the processing of a Markdown file."""

    section: Literal[
        "normal",
        "output",
        # code:comment stores language in `section`
        "code:comment:python",
        "code:comment:bash",
        # code:backticks store language in `backtick_options`
        "code:backticks",
    ] = "normal"
    code: list[str] = field(default_factory=list)
    original_output: list[str] = field(default_factory=list)
    context: dict[str, Any] = field(default_factory=dict)
    skip_code_block: bool = False
    output: list[str] | None = None
    new_lines: list[str] = field(default_factory=list)
    backtick_options: dict[str, Any] = field(default_factory=dict)
    backtick_standardize: bool = True

    def process_line(self, line: str, *, verbose: bool = False) -> None:
        """Process a line of the Markdown file."""
        if is_marker(line, "skip"):
            self.skip_code_block = True
        elif is_marker(line, "output:start"):
            self._process_output_start(line)
        elif is_marker(line, "output:end"):
            self._process_output_end()
        elif self.section.startswith("code:comment"):
            self._process_comment_code(line, verbose=verbose)
        elif self.section.startswith("code:backticks"):
            self._process_backtick_code(line, verbose=verbose)
        elif self.section == "output":
            self.original_output.append(line)
        else:
            processed_line = self._process_start_markers(line, verbose=verbose)
            if processed_line is not None:
                line = processed_line

        if self.section != "output":
            self.new_lines.append(line)

    def _process_start_markers(
        self,
        line: str,
        verbose: bool = False,  # noqa: FBT001, FBT002, ARG002
    ) -> str | None:
        for marker_name in MARKERS:
            if marker_name.endswith(":start") and is_marker(line, marker_name):
                # reset output in case previous output wasn't displayed
                self.output = None
                self.backtick_options = _extract_backtick_options(line)
                self.section, _ = marker_name.rsplit(":", 1)  # type: ignore[assignment]

                # Standardize backticks if needed
                if (
                    marker_name == "code:backticks:start"
                    and self.backtick_standardize
                    and "markdown-code-runner" in line
                ):
                    return re.sub(r"\smarkdown-code-runner.*", "", line)
                return line
        return None

    def _process_output_start(self, line: str) -> None:
        self.section = "output"
        if not self.skip_code_block:
            assert isinstance(
                self.output,
                list,
            ), f"Output must be a list, not {type(self.output)}, line: {line}"
            # Trim trailing whitespace from output lines
            trimmed_output = [line.rstrip() for line in self.output]
            self.new_lines.extend([line, MARKERS["warning"], *trimmed_output])
        else:
            self.original_output.append(line)

    def _process_output_end(self) -> None:
        self.section = "normal"
        if self.skip_code_block:
            self.new_lines.extend(self.original_output)
            self.skip_code_block = False
        self.original_output = []
        self.output = None  # Reset output after processing end of the output section

    def _process_code(
        self,
        line: str,
        end_marker: str,
        language: Literal["python", "bash"],
        *,
        remove_comment: bool = False,
        verbose: bool,
    ) -> None:
        if is_marker(line, end_marker):
            if not self.skip_code_block:
                self.output = execute_code(
                    self.code,
                    self.context,
                    language,
                    output_file=self.backtick_options.get("filename"),
                    verbose=verbose,
                )
            self.section = "normal"
            self.code = []
            self.backtick_options = {}
        else:
            self.code.append(remove_md_comment(line) if remove_comment else line)

    def _process_comment_code(self, line: str, *, verbose: bool) -> None:
        _, language = self.section.rsplit(":", 1)
        self._process_code(
            line,
            "code:comment:end",
            language,  # type: ignore[arg-type]
            remove_comment=True,
            verbose=verbose,
        )

    def _process_backtick_code(self, line: str, *, verbose: bool) -> None:
        # All end backticks markers are the same
        language = self.backtick_options["language"]
        self._process_code(line, "code:backticks:end", language, verbose=verbose)


def process_markdown(
    content: list[str],
    *,
    verbose: bool = False,
    backtick_standardize: bool = True,
    execute: bool = True,
) -> list[str]:
    """Executes code blocks in a list of Markdown-formatted strings and returns the modified list.

    Parameters
    ----------
    content
        A list of Markdown-formatted strings.
    verbose
        If True, print every line that is processed.
    backtick_standardize
        If True, clean up markdown-code-runner string from backtick code blocks.
    execute
        If True, execute code blocks and update output sections.
        If False, return content unchanged (useful with post-processing standardization).

    Returns
    -------
    list[str]
        A modified list of Markdown-formatted strings with code block output inserted.

    """
    assert isinstance(content, list), "Input must be a list"
    if not execute:
        return content

    state = ProcessingState(backtick_standardize=backtick_standardize)

    for i, line in enumerate(content):
        if verbose:
            nr = _bold(f"line {i:4d}")
            print(f"{nr}: {line}")
        state.process_line(line, verbose=verbose)
    return state.new_lines


def update_markdown_file(  # noqa: PLR0913
    input_filepath: Path | str,
    output_filepath: Path | str | None = None,
    *,
    verbose: bool = False,
    backtick_standardize: bool = True,
    execute: bool = True,
    standardize: bool = False,
) -> None:
    """Rewrite a Markdown file by executing and updating code blocks.

    Parameters
    ----------
    input_filepath : Path | str
        Path to the input Markdown file.
    output_filepath : Path | str | None
        Path to the output Markdown file. If None, overwrites input file.
    verbose : bool
        If True, print every line that is processed.
    backtick_standardize : bool
        If True, clean up markdown-code-runner string from executed backtick code blocks.
    execute : bool
        If True, execute code blocks and update output sections.
        If False, skip code execution (useful with standardize=True).
    standardize : bool
        If True, post-process to standardize ALL code fences in the output,
        removing ``markdown-code-runner`` modifiers. This is useful for
        compatibility with markdown processors like mkdocs and pandoc.

    """
    if isinstance(input_filepath, str):  # pragma: no cover
        input_filepath = Path(input_filepath)
    with input_filepath.open() as f:
        original_lines = [line.rstrip("\n") for line in f.readlines()]
    if verbose:
        print(f"Processing input file: {input_filepath}")
    new_lines = process_markdown(
        original_lines,
        verbose=verbose,
        backtick_standardize=backtick_standardize,
        execute=execute,
    )
    updated_content = "\n".join(new_lines).rstrip() + "\n"

    # Post-process to standardize all code fences if requested
    if standardize:
        if verbose:
            print("Standardizing all code fences...")
        updated_content = standardize_code_fences(updated_content)

    if verbose:
        print(f"Writing output to: {output_filepath}")
    output_filepath = (
        input_filepath if output_filepath is None else Path(output_filepath)
    )
    with output_filepath.open("w") as f:
        f.write(updated_content)
    if verbose:
        print("Done!")


def main() -> None:
    """Parse command line arguments and run the script."""
    parser = argparse.ArgumentParser(
        description="Automatically update Markdown files with code block output.",
    )
    parser.add_argument(
        "input",
        type=str,
        help="Path to the input Markdown file.",
    )
    parser.add_argument(
        "-o",
        "--output",
        type=str,
        help="Path to the output Markdown file. (default: overwrite input file)",
        default=None,
    )
    parser.add_argument(
        "-d",
        "--verbose",
        action="store_true",
        help="Enable debugging mode (default: False)",
    )
    parser.add_argument(
        "-v",
        "--version",
        action="version",
        version=f"%(prog)s {__version__}",
    )
    parser.add_argument(
        "--no-backtick-standardize",
        action="store_true",
        help="Disable backtick standardization (default: enabled for separate output files, disabled for in-place)",
        default=False,
    )
    parser.add_argument(
        "-s",
        "--standardize",
        action="store_true",
        help="Post-process to standardize ALL code fences, removing 'markdown-code-runner' modifiers",
        default=False,
    )
    parser.add_argument(
        "-n",
        "--no-execute",
        action="store_true",
        help="Skip code execution entirely (useful with --standardize for compatibility processing only)",
        default=False,
    )

    args = parser.parse_args()

    input_filepath = Path(args.input)
    output_filepath = Path(args.output) if args.output is not None else input_filepath

    # Determine backtick standardization
    backtick_standardize = (
        False if args.no_backtick_standardize else args.output is not None
    )

    update_markdown_file(
        input_filepath,
        output_filepath,
        verbose=args.verbose,
        backtick_standardize=backtick_standardize,
        execute=not args.no_execute,
        standardize=args.standardize,
    )


if __name__ == "__main__":
    main()
