import os
import re
from collections.abc import Callable

from misc_python_utils.beartypes import Directory
from misc_python_utils.processing_utils.processing_utils import exec_command


def download_data(
    base_url: str,
    file_name: str,
    data_dir: Directory,
    verbose: bool = False,
    unzip_it: bool = False,
    do_raise: bool = True,
    remove_zipped: bool = False,
) -> str | None:
    url = base_url + "/" + file_name
    file = data_dir + "/" + file_name

    try:
        if unzip_it:
            suffixes = [".zip", ".ZIP", ".tar.gz", ".tgz", ".gz", ".GZ", ".tar", ".TAR"]
            regex = r"|".join([f"(?:{s})" for s in suffixes])
            extract_folder = re.sub(regex, "", file)
            assert extract_folder != file

            if not os.path.isdir(extract_folder):
                wget_file(url, data_dir, verbose)
                os.makedirs(extract_folder, exist_ok=True)
                extract_file(file, extract_folder, get_build_extract_command_fun(file))
                if remove_zipped:
                    os.remove(file)
            return extract_folder
        elif not os.path.isfile(file):
            wget_file(url, data_dir, verbose)
    except FileNotFoundError as e:
        if do_raise:
            raise e


def get_build_extract_command_fun(file: str):
    if any(file.endswith(suf) for suf in [".zip", ".ZIP"]):

        def fun(dirr, file):
            return f"unzip -d {dirr} {file}"

    elif any(file.endswith(suf) for suf in [".tar.gz", ".tgz"]):

        def fun(dirr, file):
            return f"tar xzf {file} -C {dirr}"

    elif any(file.endswith(suf) for suf in [".tar", ".TAR"]):

        def fun(dirr, file):
            return f"tar xf {file} -C {dirr}"

    elif any(file.endswith(suf) for suf in [".gz", ".GZ"]):

        def fun(dirr, file):
            return f"gzip -dc {file} {dirr}"

    else:
        raise NotImplementedError
    return fun


def extract_file(file, extract_folder, build_extract_command_fun: Callable):
    cmd = build_extract_command_fun(extract_folder, file)
    _, stderr = exec_command(cmd)
    assert len(stderr) == 0, f"{cmd=}: {stderr=}"


def wget_file(
    url: str,
    data_folder: str,
    verbose=False,
    file_name: str | None = None,
    user: str | None = None,
    password: str | None = None,
):
    # TODO(tilo): wget.download cannot continue ??
    passw = f" --password {password} " if password is not None else ""
    user = f' --user "{user}" ' if user is not None else ""
    quiet = " -q " if not verbose else ""
    if file_name is None:
        file_name = url.split("/")[-1]
    file = f"{data_folder}/{file_name}"
    os.makedirs(data_folder, exist_ok=True)
    if os.path.isfile(file):
        cmd = f'wget -O {file} -c -N{quiet}{passw}{user} -P {data_folder} "{url}"'
    else:
        cmd = f'wget -O {file} -c {quiet}{passw}{user} -P {data_folder} "{url}"'

    print(f"{cmd=}")
    os.system(cmd)
    # TODO: why is subprocess not working?
    # download_output = exec_command(cmd)
    # if err_code != 0:
    #     raise FileNotFoundError(f"could not download {url}")


def main():
    file_name = "/test-other.tar.gz"
    base_url = "http://www.openslr.org/resources/12"
    download_data(base_url, file_name, "/tmp/test_data", unzip_it=True, verbose=True)


if __name__ == "__main__":
    main()
