from polly import omixatlas
from polly.auth import Polly
import os
import csv
from polly.errors import (
    paramException,
    UnauthorizedException,
    InvalidParameterException,
    # RequestException,
)
import json
import pytest
import requests
import pandas as pd
from polly import constants as const
from polly.constants import BASE_TEST_FORMAT_CONSTANTS_URL
from polly.errors import error_handler

# import polly.omixatlas_hlpr as omix_hlpr
from polly.validation import Validation

# from botocore.exceptions import ClientError


key = "POLLY_REFRESH_TOKEN"
token = os.getenv(key)
test_key = "TEST_POLLY_REFRESH_TOKEN"
testpolly_token = os.getenv(test_key)
dev_key = "DEV_POLLY_REFRESH_TOKEN"
devpolly_token = os.getenv(dev_key)


def test_obj_initialised():
    Polly.auth(token)
    assert omixatlas.OmixAtlas() is not None
    assert omixatlas.OmixAtlas(token) is not None
    assert Polly.get_session(token) is not None


def test_get_all_omixatlas():
    Polly.auth(token)
    nobj = omixatlas.OmixAtlas()
    obj = omixatlas.OmixAtlas(token)

    assert obj.get_all_omixatlas()["data"] is not None
    assert nobj.get_all_omixatlas()["data"] is not None


def test_get_omixatlas():
    Polly.auth(token)
    nobj = omixatlas.OmixAtlas()
    obj = omixatlas.OmixAtlas(token)
    key = "geo"

    assert obj._get_omixatlas(key)["data"] is not None
    assert nobj._get_omixatlas(key)["data"] is not None


def test_omixatlas_summary():
    Polly.auth(token)
    nobj1 = omixatlas.OmixAtlas()
    obj1 = omixatlas.OmixAtlas(token)
    key = "elucidata.liveromix_atlas"
    assert obj1.omixatlas_summary(key)["data"] is not None
    assert nobj1.omixatlas_summary(key)["data"] is not None


def test_download_data():
    Polly.auth(token)
    nobj2 = omixatlas.OmixAtlas()
    obj2 = omixatlas.OmixAtlas(token)
    repo_name = "elucidata.liveromix_atlas"
    d_id = "CCLE_metabolomics_LIVER"
    assert obj2.download_data(repo_name, d_id)["data"] is not None
    assert nobj2.download_data(repo_name, d_id)["data"] is not None


def test_add_dataset_str_type_source_folder_path():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    # source folder path is in str
    # it should in dict
    source_folder_path = "<ingestion file_path>"
    with pytest.raises(
        paramException,
        match=r".* source_folder_path should be a dict with valid data and metadata path values .*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path)


def test_add_dataset_dict_type_source_folder_path_no_metadata_key():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    repo_id = "1654268055800"
    # <ingestion_data_file_path>
    source_folder_path_data = os.getcwd()
    source_folder_path = {"data": source_folder_path_data}
    with pytest.raises(
        paramException,
        match=r".* does not have `metadata` path. Format the source_folder_path_dict .*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path)


def test_add_dataset_dict_type_source_folder_path_no_data_key():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    repo_id = "1654268055800"
    # <ingestion_metadata_file_path>
    source_folder_path_metadata = os.getcwd()
    source_folder_path = {"metadata": source_folder_path_metadata}
    with pytest.raises(
        paramException,
        match=r".* does not have `data` path. Format the source_folder_path_dict like this .*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path)


def test_add_dataset_dict_type_source_folder_path_wrong_data_path():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    # <ingestion_data_file_path>
    source_folder_path_data = f"{os.getcwd()}/data_val"
    source_folder_path_metadata = f"{os.getcwd()}/metadata_val"
    source_folder_path = {
        "data": source_folder_path_data,
        "metadata": source_folder_path_metadata,
    }
    with pytest.raises(
        paramException,
        match=r".* `data` path passed is not found. Please pass the correct path .*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path)


def test_add_dataset_missing_metadata_file():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    base_add_datatest_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "data_file"
    metadata_dir = "metadata_file"
    data_path = os.path.join(parent_dir, data_dir)
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)

    # data file 1
    data_file_1 = f"{base_add_datatest_test_file_path}/data_file/tcga_LIHC_Copy_Number_Segment_TCGA-FV-A3R2-01A.gct"
    data_file_2 = f"{base_add_datatest_test_file_path}/data_file/tcga_LIHC_Copy_Number_Segment_TCGA-FV-A3R3-01A.gct"
    response_1 = requests.get(data_file_1)
    error_handler(response_1)
    response_2 = requests.get(data_file_2)
    error_handler(response_2)

    file_1_name = "tcga_LIHC_Copy_Number_Segment_TCGA-FV-A3R2-01A.gct"
    file_2_name = "tcga_LIHC_Copy_Number_Segment_TCGA-FV-A3R3-01A.gct"

    # creating files in data path
    with open(os.path.join(data_path, file_1_name), "w") as file_1:
        file_1_content = response_1.text
        file_1.write(file_1_content)

    with open(os.path.join(data_path, file_2_name), "w") as file_2:
        file_2_content = response_2.text
        file_2.write(file_2_content)

    metadata_file_1 = f"{base_add_datatest_test_file_path}/metadata_file/tcga_LIHC_Copy_Number_Segment_TCGA-FV-A3R2-01A.jpco"

    metadata_file_1_name = "tcga_LIHC_Copy_Number_Segment_TCGA-FV-A3R2-01A.jpco"

    metadata_resp_1 = requests.get(metadata_file_1)
    error_handler(metadata_resp_1)

    # creating files in metadata path
    with open(os.path.join(metadata_path, metadata_file_1_name), "w") as file_3:
        metadata_1_content = metadata_resp_1.text
        file_3.write(metadata_1_content)

    source_folder_path = {"data": data_path, "metadata": metadata_path}
    destination_folder = "transcriptomics_test"

    with pytest.raises(
        paramException,
        match=r".* No metadata for these data files .*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path, destination_folder)


def test_add_dataset_all_edge_cases_of_ext_and_names():
    # names with `.`s, tar.gz, gct.bz, vcf.bgz
    # multi word extensions
    # zips formats
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    base_add_datatest_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL
    base_add_metadata_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "data_file_ext_checks"
    metadata_dir = "metadata_file_ext_checks"
    data_path = os.path.join(parent_dir, data_dir)
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)

    dataset_files_folder_path = (
        f"{base_add_datatest_test_file_path}/dataset_ext_name_checks"
    )
    data_file_1 = (
        f"{dataset_files_folder_path}"
        + "/DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
    )
    data_file_2 = f"{dataset_files_folder_path}/a.b.tar.gz"
    data_file_3 = f"{dataset_files_folder_path}/abc.gct.bz"
    data_file_4 = f"{dataset_files_folder_path}/def.vcf.bgz"
    data_file_5 = f"{dataset_files_folder_path}/tcga_LIHC_Copy_Number_Segment_TCGA-G3-A25Z-01A.b.gct"

    response_1 = requests.get(data_file_1)
    error_handler(response_1)
    response_2 = requests.get(data_file_2)
    error_handler(response_2)
    response_3 = requests.get(data_file_3)
    error_handler(response_3)
    response_4 = requests.get(data_file_4)
    error_handler(response_4)
    response_5 = requests.get(data_file_5)
    error_handler(response_5)

    file_1_name = "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
    file_2_name = "a.b.tar.gz"
    file_3_name = "abc.gct.bz"
    file_4_name = "def.vcf.bgz"
    file_5_name = "tcga_LIHC_Copy_Number_Segment_TCGA-G3-A25Z-01A.b.gct"

    # creating files in data path
    with open(os.path.join(data_path, file_1_name), "w") as file_1:
        file_1_content = response_1.text
        file_1.write(file_1_content)

    with open(os.path.join(data_path, file_2_name), "w") as file_2:
        file_2_content = response_2.text
        file_2.write(file_2_content)

    with open(os.path.join(data_path, file_3_name), "w") as file_3:
        file_3_content = response_3.text
        file_3.write(file_3_content)

    with open(os.path.join(data_path, file_4_name), "w") as file_4:
        file_4_content = response_4.text
        file_4.write(file_4_content)

    with open(os.path.join(data_path, file_5_name), "w") as file_5:
        file_5_content = response_5.text
        file_5.write(file_5_content)

    metadata_file_folder_path = (
        f"{base_add_metadata_test_file_path}/metadata_name_ext_checks"
    )

    metadata_file_1 = (
        f"{metadata_file_folder_path}/"
        + "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
    )
    metadata_file_2 = f"{metadata_file_folder_path}/a.b.json"
    metadata_file_3 = f"{metadata_file_folder_path}/abc.json"
    metadata_file_4 = f"{metadata_file_folder_path}/def.json"
    metadata_file_5 = f"{metadata_file_folder_path}/tcga_LIHC_Copy_Number_Segment_TCGA-G3-A25Z-01A.b.json"

    metadata_resp_1 = requests.get(metadata_file_1)
    error_handler(metadata_resp_1)

    metadata_resp_2 = requests.get(metadata_file_2)
    error_handler(metadata_resp_2)

    metadata_resp_3 = requests.get(metadata_file_3)
    error_handler(metadata_resp_3)

    metadata_resp_4 = requests.get(metadata_file_4)
    error_handler(metadata_resp_4)

    metadata_resp_5 = requests.get(metadata_file_5)
    error_handler(metadata_resp_5)

    meta_file_1_nam = "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
    metadata_file_2_name = "a.b.json"
    metadata_file_3_name = "abc.json"
    metadata_file_4_name = "def.json"
    metadata_file_5_name = "tcga_LIHC_Copy_Number_Segment_TCGA-G3-A25Z-01A.b.json"

    # creating files in metadata path
    with open(os.path.join(metadata_path, meta_file_1_nam), "w") as m_file_1:
        metadata_1_content = metadata_resp_1.text
        m_file_1.write(metadata_1_content)

    with open(os.path.join(metadata_path, metadata_file_2_name), "w") as m_file_2:
        metadata_2_content = metadata_resp_2.text
        m_file_2.write(metadata_2_content)

    with open(os.path.join(metadata_path, metadata_file_3_name), "w") as m_file_3:
        metadata_3_content = metadata_resp_3.text
        m_file_3.write(metadata_3_content)

    with open(os.path.join(metadata_path, metadata_file_4_name), "w") as m_file_4:
        metadata_4_content = metadata_resp_4.text
        m_file_4.write(metadata_4_content)

    with open(os.path.join(metadata_path, metadata_file_5_name), "w") as m_file_5:
        metadata_5_content = metadata_resp_5.text
        m_file_5.write(metadata_5_content)

    source_folder_path = {"data": data_path, "metadata": metadata_path}
    destination_folder = "transcriptomics_test_ext_checks"

    res = omix_obj.add_datasets(repo_id, source_folder_path, destination_folder)

    assert isinstance(res, pd.DataFrame)

    # number of rows should be 6 in the df -> 5 data files listed above
    # And 1 combined metadata file
    num_of_rows = len(res.index)
    assert num_of_rows == 6


def test_add_dataset_wrong_priority_value():
    Polly.auth(token)
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    source_folder_path_data = f"{os.getcwd()}/data_file"
    source_folder_path_metadata = f"{os.getcwd()}/metadata_file"
    source_folder_path = {
        "data": source_folder_path_data,
        "metadata": source_folder_path_metadata,
    }
    destination_folder = "transcriptomics_70"
    priority = "super_high"
    with pytest.raises(
        paramException,
        match=r".*`priority` should be a string. Only 3 values are allowed i.e. `low`, `medium`, `high`.*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path, destination_folder, priority)


def test_add_dataset_incorrect_priority_value_format():
    Polly.auth(token)
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    source_folder_path_data = f"{os.getcwd()}/data_file"
    source_folder_path_metadata = f"{os.getcwd()}/metadata_file"
    source_folder_path = {
        "data": source_folder_path_data,
        "metadata": source_folder_path_metadata,
    }
    destination_folder = "transcriptomics_70"
    priority = ["super_high"]
    with pytest.raises(
        paramException,
        match=r"`priority` should be a string. Only 3 values are allowed i.e. `low`, `medium`, `high`",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path, destination_folder, priority)


def test_add_dataset_data_file_with_wrong_extension():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"

    file_3_name = "incorrect_ext_data_file.txt"

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "data_file_wrong_ext"
    metadata_dir = "metadata_file_wrong_ext"
    data_path = os.path.join(parent_dir, data_dir)
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)

    # creating files in data path
    with open(os.path.join(data_path, file_3_name), "w") as file_3:
        file_3_content = "wrong extension data file"
        file_3.write(file_3_content)

    source_folder_path = {"data": data_path, "metadata": metadata_path}
    destination_folder = "transcriptomics_test"

    with pytest.raises(
        paramException,
        match=r".* File format for file .* invalid.It can be =>.*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path, destination_folder)


def test_add_dataset_metadata_file_with_wrong_extension():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"

    metadata_2_name = "incorrect_ext_data_file.txt"

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "data_file_wrong_ext"
    metadata_dir = "metadata_file_wrong_ext"
    data_path = os.path.join(parent_dir, data_dir)
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)

    # creating files in data path
    with open(os.path.join(metadata_path, metadata_2_name), "w") as file_3:
        file_3_content = "wrong extension data file"
        file_3.write(file_3_content)

    source_folder_path = {"data": data_path, "metadata": metadata_path}
    destination_folder = "transcriptomics_test"

    with pytest.raises(
        paramException,
        match=r".* File format for file .* invalid.It can be =>.*",
    ):
        omix_obj.add_datasets(repo_id, source_folder_path, destination_folder)


def test_delete_dataset_wrong_format_of_dataset_id():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    dataset_ids = "tcga_LIHC_Copy_Number_Segment_TCGA-FV-A3R3-01A"

    with pytest.raises(
        paramException, match=r".* dataset_ids should be list of strings.*"
    ):
        omix_obj.delete_datasets(repo_id, dataset_ids)


def test_devpolly_login():
    Polly.auth(devpolly_token, env="devpolly")
    assert omixatlas.OmixAtlas() is not None
    assert omixatlas.OmixAtlas(devpolly_token, env="devpolly") is not None


def test_delete_unauthorized_error_wrong_token():
    dev_token = "abcdefgh"
    Polly.auth(dev_token, env="testpolly")
    with pytest.raises(UnauthorizedException, match=r"Expired or Invalid Token"):
        omixatlas.OmixAtlas()


def test_delete_empty_repo_id():
    Polly.auth(devpolly_token, env="devpolly")
    omix_obj = omixatlas.OmixAtlas()
    repo_id = ""
    dataset_ids = ["GSE100009_GPL11154"]

    with pytest.raises(paramException, match=r".* repo_id should be str or int.*"):
        omix_obj.delete_datasets(repo_id, dataset_ids)


def test_query_metadata():
    Polly.auth(token)
    nobj3 = omixatlas.OmixAtlas()
    obj3 = omixatlas.OmixAtlas(token)
    query_dict = {}
    with open("tests/query.csv") as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=",")
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                line_count += 1
            else:
                query_dict[row[0]] = row[1]
    query_dataset_level = query_dict["query_dataset_level"]
    query_sample_level = query_dict["query_sample_level"]
    query_feature_level = query_dict["query_feature_level"]
    query_singlecell_sample = query_dict["query_singlecell_sample"]
    query_singlecell_feature = query_dict["query_singlecell_feature"]
    assert dict(obj3.query_metadata(query_feature_level)) is not None
    assert dict(obj3.query_metadata(query_sample_level)) is not None
    assert dict(obj3.query_metadata(query_dataset_level)) is not None
    assert dict(obj3.query_metadata(query_singlecell_sample)) is not None
    assert dict(obj3.query_metadata(query_singlecell_feature)) is not None
    assert dict(nobj3.query_metadata(query_feature_level)) is not None
    assert dict(nobj3.query_metadata(query_sample_level)) is not None
    assert dict(nobj3.query_metadata(query_dataset_level)) is not None
    assert dict(nobj3.query_metadata(query_singlecell_sample)) is not None
    assert dict(nobj3.query_metadata(query_singlecell_feature)) is not None


def test_generate_report():
    invalid_repo_key = 9
    valid_repo_key = "9"
    invalid_dataset_id = 9
    valid_dataset_id = "9"
    valid_workspace_id = 9
    invalid_workspace_id = "9"
    obj = omixatlas.OmixAtlas(token)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.generate_report(invalid_repo_key, valid_dataset_id, valid_workspace_id)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.generate_report(valid_repo_key, invalid_dataset_id, valid_workspace_id)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.generate_report(valid_repo_key, valid_dataset_id, invalid_workspace_id)


'''def test_update_function_by_updating_description():
    # Either value will be updated to the current value
    # Or it will be the current value
    # If value is not equal to the current value means
    # update is not working

    description = "updated description ingestion_test_1 omixatlas"
    # ingestion_test_1
    repo_key = "1654268055800"
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    res = omix_obj.update(repo_key, description=description)
    description_value = res.iloc[0, 4]
    assert description_value == "updated description ingestion_test_1 omixatlas"'''


def test_create_oa_wrong_category_value():
    # Passing wrong value of category -> will throw error
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    with pytest.raises(
        paramException,
        match=r".* should be a string and its value must be one of .*",
    ):
        omix_obj.create(
            "omix_category_test_13",
            "Created Omixatlas to test Category",
            category="test",
        )


def test_create_oa_wrong_description_type():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    with pytest.raises(
        paramException,
        match=r".* should be a string.*",
    ):
        omix_obj.create(
            "omix_category_test_13", ["Created Omixatlas to test description"]
        )


def test_create_oa_wrong_display_name_type():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    with pytest.raises(
        paramException,
        match=r".* should be a string.*",
    ):
        omix_obj.create(1234, "Created Omixatlas to test description")


def test_create_oa_wrong_image_url_type():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    with pytest.raises(
        paramException,
        match=r".* should be a string.*",
    ):
        omix_obj.create(
            "omix_desc_test_12",
            "Created Omixatlas to test description",
            image_url=["/a/c/img.png"],
        )


def test_validate_schema_function_empty_repo_id():
    # load data
    response = requests.get(const.SCHEMA_VALIDATION.get("empty_repo_id"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # if repo_id is empty, it will not match payload identifier
    with pytest.raises(
        paramException,
        match=r"Value of repo_id key in the schema payload dict is not valid repo_id.*",
    ):
        omix_obj.validate_schema(test_data)


def test_validate_schema_function_missing_repo_id():
    # load data
    response = requests.get(const.SCHEMA_VALIDATION.get("missing_repo_id"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    with pytest.raises(
        paramException,
        match=r"schema_dict not in correct format, repo_id key not present.*",
    ):
        omix_obj.validate_schema(test_data)


def test_validate_schema_function_missing_schema_key():
    # load data
    response = requests.get(const.SCHEMA_VALIDATION.get("missing_schema_key"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    with pytest.raises(
        paramException,
        match=r"schema_dict not in correct format, schema key not present*",
    ):
        omix_obj.validate_schema(test_data)


"""def test_validate_schema_function_wrong_repo_id():
    # load data
    response = requests.get(const.SCHEMA_VALIDATION.get("wrong_repo_id"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    with pytest.raises(
        RequestException,
        match=r".*No repository for identifier.*",
    ):
        omix_obj.validate_schema(test_data)"""


def test_validate_schema_function_field_name_capital():
    # load data
    response = requests.get(const.SCHEMA_VALIDATION.get("field_name_cap"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[2]["message"]

    assert isinstance(res_df, pd.DataFrame)
    assert "Lowercase only, Start with alphabets" in msg_val


def test_validate_schema_function_field_name_having_underscore():
    response = requests.get(const.SCHEMA_VALIDATION.get("field_name_underscore"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]

    assert isinstance(res_df, pd.DataFrame)
    assert "Cannot include special characters except `_`" in msg_val


def test_validate_schema_function_field_name_having_resv_keywords():
    response = requests.get(const.SCHEMA_VALIDATION.get("field_name_resv_keyword"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[2]["message"]

    assert isinstance(res_df, pd.DataFrame)
    assert "Cannot be SQL reserved DDL and DML keywords" in msg_val


def test_validate_schema_function_field_name_having_original_name_empty():
    response = requests.get(const.SCHEMA_VALIDATION.get("original_name_empty"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]

    assert isinstance(res_df, pd.DataFrame)
    assert "ensure this value has at least 1 characters" in msg_val


def test_validate_schema_function_field_name_having_original_name_greater_than_50_chars():
    response = requests.get(const.SCHEMA_VALIDATION.get("original_name_grtr_50"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]

    assert isinstance(res_df, pd.DataFrame)
    assert "ensure this value has at most 50 characters" in msg_val


def test_validate_schema_function_field_name_having_type_is_not_supported():
    response = requests.get(const.SCHEMA_VALIDATION.get("type_cosco"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]

    assert isinstance(res_df, pd.DataFrame)
    assert (
        "value is not a valid enumeration member; permitted: 'boolean', 'integer', 'float', 'text', 'object'"
        in msg_val
    )


def test_validate_schema_function_field_name_having_is_array_string():
    response = requests.get(const.SCHEMA_VALIDATION.get("is_arr_str"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]

    assert isinstance(res_df, pd.DataFrame)
    assert "value could not be parsed to a boolean" in msg_val


def test_validate_schema_function_field_name_having_is_keyword_string():
    response = requests.get(const.SCHEMA_VALIDATION.get("is_keyword_str"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]
    assert isinstance(res_df, pd.DataFrame)
    assert "value could not be parsed to a boolean" in msg_val


def test_validate_schema_function_field_name_having_filter_size_less():
    response = requests.get(const.SCHEMA_VALIDATION.get("filter_size_less"))
    error_handler(response)
    test_data = json.loads(response.text)
    print(test_data)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]
    assert isinstance(res_df, pd.DataFrame)
    assert "ensure this value is greater than or equal to 1" in msg_val


def test_validate_schema_function_field_name_having_filter_size_greater():
    response = requests.get(const.SCHEMA_VALIDATION.get("filter_size_greater"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]
    assert isinstance(res_df, pd.DataFrame)
    assert "ensure this value is less than or equal to 3000" in msg_val


def test_validate_schema_function_field_name_having_display_name_empty():
    response = requests.get(const.SCHEMA_VALIDATION.get("display_name_empty"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    loc = res_df.iloc[0]["attribute"]
    msg_val = res_df.iloc[0]["message"]
    assert isinstance(res_df, pd.DataFrame)
    assert "display_name" in loc
    assert "ensure this value has at least 1 characters" in msg_val


def test_validate_schema_function_field_name_having_display_name_greater():
    response = requests.get(const.SCHEMA_VALIDATION.get("display_name_grtr_50"))
    error_handler(response)
    test_data = json.loads(response.text)
    print(test_data)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    loc = res_df.iloc[0]["attribute"]
    msg_val = res_df.iloc[0]["message"]
    assert isinstance(res_df, pd.DataFrame)
    assert "display_name" in loc
    assert "ensure this value has at most 50 characters" in msg_val


def test_validate_schema_function_field_name_having_is_filter_is_keyword():
    response = requests.get(const.SCHEMA_VALIDATION.get("is_keywrd_is_filter"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]
    assert isinstance(res_df, pd.DataFrame)
    assert "is_keyword is False and is_filter is True" in msg_val


def test_validate_schema_function_field_name_having_is_filter_is_ontology():
    response = requests.get(const.SCHEMA_VALIDATION.get("is_keywrd_is_ontology"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]
    assert isinstance(res_df, pd.DataFrame)
    assert "is_filter is False and is_ontology is True" in msg_val


def test_validate_schema_function_positive_case():
    response = requests.get(const.SCHEMA_VALIDATION.get("positive_case"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    res_df_len = len(res_df.index)

    assert res_df_len == 0


def test_validate_schema_function_having_original_name_int():
    response = requests.get(const.SCHEMA_VALIDATION.get("original_name_int"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]
    attribute_val = res_df.iloc[0]["attribute"]
    assert isinstance(res_df, pd.DataFrame)
    assert attribute_val == "original_name"
    assert "str type expected" in msg_val


def test_validate_schema_function_having_field_size_str():
    response = requests.get(const.SCHEMA_VALIDATION.get("filter_size_str"))
    error_handler(response)
    test_data = json.loads(response.text)
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()

    res_df = omix_obj.validate_schema(test_data)
    msg_val = res_df.iloc[0]["message"]
    attribute_val = res_df.iloc[0]["attribute"]
    assert isinstance(res_df, pd.DataFrame)
    assert attribute_val == "filter_size"
    assert "value is not a valid integer" in msg_val


def test_download_metadata():
    Polly.auth(token)
    omix_obj = omixatlas.OmixAtlas()
    repo_key = "geo"
    dataset_id = "GSE10001_GPL6246"
    assert omix_obj.download_metadata(repo_key, dataset_id, os.getcwd()) is None
    file_path = f"{os.getcwd()}/{dataset_id}.json"
    assert os.path.exists(file_path) is True
    os.remove(file_path)


def test_link_report():
    invalid_repo_key = 9
    valid_repo_key = "9"
    invalid_dataset_id = 9
    valid_dataset_id = "9"
    valid_workspace_id = 9
    invalid_workspace_id = "9"
    valid_workspace_path = "path_to_workspaces"
    invalid_workspace_path = 9
    valid_access_key = "private"
    invalid_access_key = 9
    obj = omixatlas.OmixAtlas(token)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.link_report(
            invalid_repo_key,
            valid_dataset_id,
            valid_workspace_id,
            valid_workspace_path,
            valid_access_key,
        )
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.link_report(
            valid_repo_key,
            invalid_dataset_id,
            valid_workspace_id,
            valid_workspace_path,
            valid_access_key,
        )
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.link_report(
            valid_repo_key,
            valid_dataset_id,
            invalid_workspace_id,
            valid_workspace_path,
            valid_access_key,
        )
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.link_report(
            valid_repo_key,
            valid_dataset_id,
            valid_workspace_id,
            invalid_workspace_path,
            valid_access_key,
        )
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.link_report(
            valid_repo_key,
            valid_dataset_id,
            valid_workspace_id,
            valid_workspace_path,
            invalid_access_key,
        )


def test_fetch_linked_reports():
    invalid_repo_key = 9
    valid_repo_key = "9"
    invalid_dataset_id = 9
    valid_dataset_id = "9"
    obj = omixatlas.OmixAtlas(token)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.fetch_linked_reports(invalid_repo_key, valid_dataset_id)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.fetch_linked_reports(valid_repo_key, invalid_dataset_id)


def test_delete_linked_report():
    invalid_repo_key = 9
    valid_repo_key = "9"
    invalid_dataset_id = 9
    valid_dataset_id = "9"
    valid_report_id = "report_id"
    invalid_report_id = 9
    obj = omixatlas.OmixAtlas(token)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.delete_linked_report(invalid_repo_key, valid_dataset_id, valid_report_id)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.delete_linked_report(valid_repo_key, invalid_dataset_id, valid_report_id)
    with pytest.raises(
        InvalidParameterException,
        match=r".* Invalid Parameters .*",
    ):
        obj.delete_linked_report(valid_repo_key, valid_dataset_id, invalid_report_id)


# update dataset test cases
def test_update_dataset_str_type_source_folder_path():
    """
    incorrect datatype of source folder path
    shall throw an error
    """
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    # source folder path is in str
    # it should in dict
    source_folder_path = "<ingestion file_path>"
    with pytest.raises(
        paramException,
        match=r".* source_folder_path should be a dict with valid data and metadata path values .*",
    ):
        omix_obj.update_datasets(repo_id, source_folder_path)


def test_update_dataset_dict_type_source_folder_path_wrong_data_path():
    """
    incorrect data path shall throw an error
    """
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    # <ingestion_data_file_path>
    source_folder_path_data = f"{os.getcwd()}/data_val"
    source_folder_path = {"data": source_folder_path_data}
    with pytest.raises(
        paramException,
        match=r".* path passed is not found. Please pass the correct path and call the function again.*",
    ):
        omix_obj.update_datasets(repo_id, source_folder_path)


# wrong test -> this test is not applicable now
# def test_update_dataset_data_metadata():
#     """
#     tests:
#     if data and metadata are correctly provided
#     if not already present in oa -> uploaded.
#     if already present in oa -> updated
#     """
#     Polly.auth(testpolly_token, env="testpolly")
#     omix_obj = omixatlas.OmixAtlas()
#     repo_id = "1654268055800"

#     base_add_datatest_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL
#     base_add_metadata_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL
#     priority = "high"
#     # creating directory
#     parent_dir = os.getcwd()
#     data_dir = "dataset_ext_name_checks"
#     metadata_dir = "metadata_name_ext_checks"
#     data_path = os.path.join(parent_dir, data_dir)
#     metadata_path = os.path.join(parent_dir, metadata_dir)

#     # data directory
#     if not os.path.isdir(data_path):
#         os.makedirs(data_path)
#     # metadata directory
#     if not os.path.isdir(metadata_path):
#         os.makedirs(metadata_path)

#     # data file 1
#     dataset_files_folder_path = f"{base_add_datatest_test_file_path}/{data_dir}"
#     data_file_1 = (
#         f"{dataset_files_folder_path}"
#         + "/DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
#     )
#     response_1 = requests.get(data_file_1)
#     error_handler(response_1)
#     file_1_name = "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
#     with open(os.path.join(data_path, file_1_name), "w") as file_1:
#         file_1_content = response_1.text
#         file_1.write(file_1_content)

#     metadata_file_folder_path = (
#         f"{base_add_metadata_test_file_path}/metadata_name_ext_checks"
#     )

#     metadata_file_1 = (
#         f"{metadata_file_folder_path}/"
#         + "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
#     )
#     metadata_resp_1 = requests.get(metadata_file_1)
#     error_handler(metadata_resp_1)

#     meta_file_1_nam = (
# "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
#     )

#     # creating files in metadata path
#     with open(os.path.join(metadata_path, meta_file_1_nam), "w") as m_file_1:
#         metadata_1_content = metadata_resp_1.text
#         m_file_1.write(metadata_1_content)

#     source_folder_path = {"data": data_path, "metadata": metadata_path}
#     destination_folder = "transcriptomics_75"

#     res = omix_obj.update_datasets(
#         repo_id, source_folder_path, destination_folder, priority
#     )

#     assert isinstance(res, pd.DataFrame)

#     # number of rows should be 6 in the df -> 5 data files listed above
#     # And 1 combined metadata file
#     num_of_rows = len(res.index)
#     assert num_of_rows == 2


def test_update_dataset_update_metadata_with_no_dataset_in_oa():
    """
    updating a metadata with no data in the oa
    shall throw a warning and skip the update
    """
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    base_add_metadata_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL

    # creating directory
    parent_dir = os.getcwd()
    metadata_dir = "metadata_name_new"
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)
    metadata_file_folder_path = (
        f"{base_add_metadata_test_file_path}/metadata_name_ext_checks"
    )
    # metadata file such that corresponding data file is not present in OA
    metadata_file_1 = (
        f"{metadata_file_folder_path}/"
        + "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
    )
    metadata_resp_1 = requests.get(metadata_file_1)
    error_handler(metadata_resp_1)

    meta_file_1_nam = "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corrN.json"

    # creating files in metadata path
    with open(os.path.join(metadata_path, meta_file_1_nam), "w") as m_file_1:
        metadata_1_content = metadata_resp_1.text
        m_file_1.write(metadata_1_content)
    source_folder_path = {"metadata": metadata_path}
    destination_folder = "transcriptomics_75"

    with pytest.warns(Warning) as record:
        omix_obj.update_datasets(repo_id, source_folder_path, destination_folder)
        if not record:
            pytest.fail("Expected a warning!")
    print(record)
    assert len(record) >= 1


# these files are there in s3 but not ingested in infra
# that is why it is not showing in the list files API
# that is why a warning is getting raised because system shows that these files
# are present in it from before -> so this test is failing
# needs to be discussed with shilpa once
# def test_update_dataset_all_edge_cases_of_ext_and_names_with_one_missing_data():
#     """
#     tests:
#     1. support for the different file formats for data and metadata
#     2. warning shall be given if a metadata is being updated without any data file.
#     3. all datafiles with supported formats shall be updated if metadata files provided
#     """
#     # test_update_dataset_all_edge_cases_of_ext_and_names_with_one_missing_metadata
#     # names with `.`s, tar.gz, gct.bz, vcf.bgz
#     # multi word extensions
#     # zips formats
#     Polly.auth(testpolly_token, env="testpolly")
#     omix_obj = omixatlas.OmixAtlas()
#     repo_id = "1654268055800"
#     base_add_datatest_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL
#     base_add_metadata_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL

#     # creating directory
#     parent_dir = os.getcwd()
#     data_dir = "data_file_ext_checks"
#     metadata_dir = "metadata_file_ext_checks"
#     data_path = os.path.join(parent_dir, data_dir)
#     metadata_path = os.path.join(parent_dir, metadata_dir)

#     # data directory
#     if not os.path.isdir(data_path):
#         os.makedirs(data_path)
#     # metadata directory
#     if not os.path.isdir(metadata_path):
#         os.makedirs(metadata_path)

#     dataset_files_folder_path = (
#         f"{base_add_datatest_test_file_path}/dataset_ext_name_checks"
#     )
#     data_file_1 = (
#         f"{dataset_files_folder_path}"
#         + "/DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
#     )
#     data_file_2 = f"{dataset_files_folder_path}/a.b.tar.gz"
#     data_file_3 = f"{dataset_files_folder_path}/abc.gct.bz"
#     data_file_4 = f"{dataset_files_folder_path}/def.vcf.bgz"

#     response_1 = requests.get(data_file_1)
#     error_handler(response_1)
#     response_2 = requests.get(data_file_2)
#     error_handler(response_2)
#     response_3 = requests.get(data_file_3)
#     error_handler(response_3)
#     response_4 = requests.get(data_file_4)
#     error_handler(response_4)

#     file_1_name = "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
#     file_2_name = "a.b.tar.gz"
#     file_3_name = "abc.gct.bz"
#     file_4_name = "def.vcf.bgz"

#     # creating files in data path
#     with open(os.path.join(data_path, file_1_name), "w") as file_1:
#         file_1_content = response_1.text
#         file_1.write(file_1_content)

#     with open(os.path.join(data_path, file_2_name), "w") as file_2:
#         file_2_content = response_2.text
#         file_2.write(file_2_content)

#     with open(os.path.join(data_path, file_3_name), "w") as file_3:
#         file_3_content = response_3.text
#         file_3.write(file_3_content)

#     with open(os.path.join(data_path, file_4_name), "w") as file_4:
#         file_4_content = response_4.text
#         file_4.write(file_4_content)

#     metadata_file_folder_path = (
#         f"{base_add_metadata_test_file_path}/metadata_name_ext_checks"
#     )

#     metadata_file_1 = (
#         f"{metadata_file_folder_path}/"
#         + "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
#     )
#     metadata_file_2 = f"{metadata_file_folder_path}/a.b.json"
#     metadata_file_3 = f"{metadata_file_folder_path}/abc.json"
#     metadata_file_4 = f"{metadata_file_folder_path}/def.json"
#     metadata_file_5 = f"{metadata_file_folder_path}/tcga_LIHC_Copy_Number_Segment_TCGA-G3-A25Z-01A.b.json"

#     metadata_resp_1 = requests.get(metadata_file_1)
#     error_handler(metadata_resp_1)

#     metadata_resp_2 = requests.get(metadata_file_2)
#     error_handler(metadata_resp_2)

#     metadata_resp_3 = requests.get(metadata_file_3)
#     error_handler(metadata_resp_3)

#     metadata_resp_4 = requests.get(metadata_file_4)
#     error_handler(metadata_resp_4)

#     metadata_resp_5 = requests.get(metadata_file_5)
#     error_handler(metadata_resp_5)

# meta_file_1_nam = (
#         "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
# )
#     metadata_file_2_name = "a.b.json"
#     metadata_file_3_name = "abc.json"
#     metadata_file_4_name = "def.json"
#     metadata_file_5_name = "tcga_LIHC_Copy_Number_Segment_TCGA-G3-A25Z-01A.b.json"

#     # creating files in metadata path
#     with open(os.path.join(metadata_path, meta_file_1_nam), "w") as m_file_1:
#         metadata_1_content = metadata_resp_1.text
#         m_file_1.write(metadata_1_content)

#     with open(os.path.join(metadata_path, metadata_file_2_name), "w") as m_file_2:
#         metadata_2_content = metadata_resp_2.text
#         m_file_2.write(metadata_2_content)

#     with open(os.path.join(metadata_path, metadata_file_3_name), "w") as m_file_3:
#         metadata_3_content = metadata_resp_3.text
#         m_file_3.write(metadata_3_content)

#     with open(os.path.join(metadata_path, metadata_file_4_name), "w") as m_file_4:
#         metadata_4_content = metadata_resp_4.text
#         m_file_4.write(metadata_4_content)

#     with open(os.path.join(metadata_path, metadata_file_5_name), "w") as m_file_5:
#         metadata_5_content = metadata_resp_5.text
#         m_file_5.write(metadata_5_content)

#     source_folder_path = {"data": data_path, "metadata": metadata_path}
#     destination_folder = "transcriptomics_75"

#     # then updating the same datasets
#     res = omix_obj.update_datasets(repo_id, source_folder_path, destination_folder)
#     assert isinstance(res, pd.DataFrame)

#     # number of rows should be 6 in the df -> 5 data files listed above
#     # And 1 combined metadata file
#     num_of_rows = len(res.index)
#     assert num_of_rows == 6


def test_update_dataset_wrong_priority_value():
    """
    update dataset with incorrect priority value
    shall throw error
    """
    Polly.auth(token)
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    source_folder_path_data = f"{os.getcwd()}/data_file"
    source_folder_path_metadata = f"{os.getcwd()}/metadata_file"
    source_folder_path = {
        "data": source_folder_path_data,
        "metadata": source_folder_path_metadata,
    }
    destination_folder = "transcriptomics_70"
    priority = "super_high"
    with pytest.raises(
        paramException,
        match=r".*`priority` should be a string. Only 3 values are allowed i.e. `low`, `medium`, `high`.*",
    ):
        omix_obj.update_datasets(
            repo_id, source_folder_path, destination_folder, priority
        )


def test_update_dataset_incorrect_priority_value_format():
    """
    testing args datatype/format:
    update dataset with incorrect priority value format
    shall throw error
    """
    Polly.auth(token)
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    source_folder_path_data = f"{os.getcwd()}/data_file"
    source_folder_path_metadata = f"{os.getcwd()}/metadata_file"
    source_folder_path = {
        "data": source_folder_path_data,
        "metadata": source_folder_path_metadata,
    }
    destination_folder = "transcriptomics_70"
    priority = ["super_high"]
    with pytest.raises(
        paramException,
        match=r"`priority` should be a string. Only 3 values are allowed i.e. `low`, `medium`, `high`",
    ):
        omix_obj.update_datasets(
            repo_id, source_folder_path, destination_folder, priority
        )


def test_update_dataset_data_file_with_wrong_extension():
    """
    updating dataset file with invalid extension
    shall throw error
    """
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    repo_id = "1654268055800"

    file_3_name = "incorrect_ext_data_file.txt"

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "data_file_wrong_ext"
    metadata_dir = "metadata_file_wrong_ext"
    data_path = os.path.join(parent_dir, data_dir)
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)

    # creating files in data path
    with open(os.path.join(data_path, file_3_name), "w") as file_3:
        file_3_content = "wrong extension data file"
        file_3.write(file_3_content)

    source_folder_path = {"data": data_path, "metadata": metadata_path}
    destination_folder = "transcriptomics_test"

    with pytest.raises(
        paramException,
        match=r".* File format for file .* invalid.It can be =>.*",
    ):
        omix_obj.update_datasets(repo_id, source_folder_path, destination_folder)


def test_update_dataset_new_data_file_without_metadata_file():
    """
    updating data file which has not been uploaded to an OA
    shall throw a warning
    """
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"
    base_add_datatest_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "update_dataset_test_files/data"
    data_path = os.path.join(parent_dir, data_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)

    dataset_files_folder_path = (
        f"{base_add_datatest_test_file_path}/update_dataset_test_files/data/"
    )
    data_file_1 = (
        f"{dataset_files_folder_path}" + "PRAD_CPCG_2017_Mutation_TCGA-2A-A8VL-01.gct"
    )
    response_1 = requests.get(data_file_1)
    error_handler(response_1)

    file_1_name = "PRAD_CPCG_2017_Mutation_TCGA-2A-A8VL-01_new.gct"

    # creating files in data path
    with open(os.path.join(data_path, file_1_name), "w") as file_1:
        file_1_content = response_1.text
        file_1.write(file_1_content)

    source_folder_path = {"data": data_path}
    destination_folder = "transcriptomics_test"
    with pytest.warns(Warning) as record:
        omix_obj.update_datasets(repo_id, source_folder_path, destination_folder)
        if not record:
            pytest.fail("Expected a warning!")


def test_update_dataset_metadata_file_with_wrong_extension():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    # ingestion_test_1
    repo_id = "1654268055800"

    metadata_2_name = "incorrect_ext_data_file.txt"

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "data_file_wrong_ext"
    metadata_dir = "metadata_file_wrong_ext"
    data_path = os.path.join(parent_dir, data_dir)
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)

    # creating files in data path
    with open(os.path.join(metadata_path, metadata_2_name), "w") as file_3:
        file_3_content = "wrong extension data file"
        file_3.write(file_3_content)

    source_folder_path = {"data": data_path, "metadata": metadata_path}
    destination_folder = "transcriptomics_test"

    with pytest.raises(
        paramException,
        match=r".* File format for file .* invalid.It can be =>.*",
    ):
        omix_obj.update_datasets(repo_id, source_folder_path, destination_folder)


def test_validation_addition_datasets_full_flow():
    Polly.auth(testpolly_token, env="testpolly")
    omix_obj = omixatlas.OmixAtlas()
    validation_obj = Validation()
    # ingestion_test_1
    repo_id = "1654268055800"
    base_add_datatest_test_file_path = const.VALIDATION_FLOW_FILES_URL

    # creating directory
    parent_dir = os.getcwd()
    data_dir = "dataset"
    metadata_dir = "metadata"
    data_path = os.path.join(parent_dir, data_dir)
    metadata_path = os.path.join(parent_dir, metadata_dir)

    # data directory
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    # metadata directory
    if not os.path.isdir(metadata_path):
        os.makedirs(metadata_path)

    # data file 1
    data_file_1 = f"{base_add_datatest_test_file_path}/dataset/ACBC_MSKCC_2015_Copy_Number_AdCC10T.gct"
    data_file_2 = f"{base_add_datatest_test_file_path}/dataset/ACBC_MSKCC_2015_Copy_Number_AdCC11T.gct"
    data_file_3 = f"{base_add_datatest_test_file_path}/dataset/ACBC_MSKCC_2015_Copy_Number_AdCC12T.gct"
    data_file_4 = f"{base_add_datatest_test_file_path}/dataset/ACBC_MSKCC_2015_Copy_Number_AdCC1T.gct"
    response_1 = requests.get(data_file_1)
    error_handler(response_1)
    response_2 = requests.get(data_file_2)
    error_handler(response_2)
    response_3 = requests.get(data_file_3)
    error_handler(response_3)
    response_4 = requests.get(data_file_4)
    error_handler(response_4)

    file_1_name = "ACBC_MSKCC_2015_Copy_Number_AdCC1T.gct"
    file_2_name = "ACBC_MSKCC_2015_Copy_Number_AdCC10T.gct"
    file_3_name = "ACBC_MSKCC_2015_Copy_Number_AdCC12T.gct"
    file_4_name = "ACBC_MSKCC_2015_Copy_Number_AdCC11T.gct"

    # creating files in data path
    with open(os.path.join(data_path, file_1_name), "w") as file_1:
        file_1_content = response_1.text
        file_1.write(file_1_content)

    with open(os.path.join(data_path, file_2_name), "w") as file_2:
        file_2_content = response_2.text
        file_2.write(file_2_content)

    with open(os.path.join(data_path, file_3_name), "w") as file_3:
        file_3_content = response_3.text
        file_3.write(file_3_content)

    with open(os.path.join(data_path, file_4_name), "w") as file_4:
        file_4_content = response_4.text
        file_4.write(file_4_content)

    metadata_file_1 = f"{base_add_datatest_test_file_path}/metadata/ACBC_MSKCC_2015_Copy_Number_AdCC10T.json"
    metadata_file_2 = f"{base_add_datatest_test_file_path}/metadata/ACBC_MSKCC_2015_Copy_Number_AdCC11T.json"
    metadata_file_3 = f"{base_add_datatest_test_file_path}/metadata/ACBC_MSKCC_2015_Copy_Number_AdCC12T.json"
    metadata_file_4 = f"{base_add_datatest_test_file_path}/metadata/ACBC_MSKCC_2015_Copy_Number_AdCC1T.json"

    metadata_file_1_name = "ACBC_MSKCC_2015_Copy_Number_AdCC10T.json"
    metadata_file_2_name = "ACBC_MSKCC_2015_Copy_Number_AdCC11T.json"
    metadata_file_3_name = "ACBC_MSKCC_2015_Copy_Number_AdCC12T.json"
    metadata_file_4_name = "ACBC_MSKCC_2015_Copy_Number_AdCC1T.json"

    metadata_resp_1 = requests.get(metadata_file_1)
    error_handler(metadata_resp_1)
    metadata_resp_2 = requests.get(metadata_file_2)
    error_handler(metadata_resp_2)
    metadata_resp_3 = requests.get(metadata_file_3)
    error_handler(metadata_resp_3)
    metadata_resp_4 = requests.get(metadata_file_4)
    error_handler(metadata_resp_4)

    # creating files in metadata path
    with open(os.path.join(metadata_path, metadata_file_1_name), "w") as meta_file_1:
        metadata_1_content = metadata_resp_1.text
        meta_file_1.write(metadata_1_content)

    with open(os.path.join(metadata_path, metadata_file_2_name), "w") as meta_file_2:
        metadata_2_content = metadata_resp_2.text
        meta_file_2.write(metadata_2_content)

    with open(os.path.join(metadata_path, metadata_file_3_name), "w") as meta_file_3:
        metadata_3_content = metadata_resp_3.text
        meta_file_3.write(metadata_3_content)

    with open(os.path.join(metadata_path, metadata_file_4_name), "w") as meta_file_4:
        metadata_4_content = metadata_resp_4.text
        meta_file_4.write(metadata_4_content)

    source_folder_path = {"data": data_path, "metadata": metadata_path}
    destination_folder = "transcriptomics_test"

    err_dataset_df, status_dict = validation_obj.validate_datasets(
        repo_id, source_folder_path
    )

    assert isinstance(err_dataset_df, pd.DataFrame)
    assert isinstance(status_dict, dict)

    res_df = omix_obj.add_datasets(
        repo_id, source_folder_path, destination_folder, validation=True
    )
    assert isinstance(res_df, pd.DataFrame)

    # only two files will be uploaded
    # Other two files will not be uploaded as they have failed validation
    # applying assertion on res_df to check only two files
    # coming in `res_df`
    # 1 more file `combined_metadata` file will be uploaded
    assert res_df.shape[0] == 3


# DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip
# this file is there in s3 but not ingested in the infra so it is giving warning that is why test is failing
# def test_update_dataset_correct_combined_metadata_json():
#     # test generation of combined_metadata json and content
#     # json should have ingestion and dataset details.

#     Polly.auth(testpolly_token, env="testpolly")
#     omix_obj = omixatlas.OmixAtlas()
#     repo_id = "1654268055800"
#     base_add_datatest_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL
#     base_add_metadata_test_file_path = BASE_TEST_FORMAT_CONSTANTS_URL
#     priority = "high"
#     # creating directory
#     parent_dir = os.getcwd()
#     data_dir = "dataset_ext_name_checks"
#     metadata_dir = "metadata_name_ext_checks"
#     data_path = os.path.join(parent_dir, data_dir)
#     metadata_path = os.path.join(parent_dir, metadata_dir)

#     # data directory
#     if not os.path.isdir(data_path):
#         os.makedirs(data_path)
#     # metadata directory
#     if not os.path.isdir(metadata_path):
#         os.makedirs(metadata_path)
#     data_file_1 = "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
#     # data file 1
#     dataset_files_folder_path = f"{base_add_datatest_test_file_path}/{data_dir}"
#     data_file_1 = (
#         f"{dataset_files_folder_path}"
#         + "/DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
#     )
#     response_1 = requests.get(data_file_1)
#     error_handler(response_1)
#     file_1_name = "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.zip"
#     with open(os.path.join(data_path, file_1_name), "w") as file_1:
#         file_1_content = response_1.text
#         file_1.write(file_1_content)

#     metadata_file_folder_path = (
#         f"{base_add_metadata_test_file_path}/metadata_name_ext_checks"
#     )

#     metadata_file_1 = (
#         f"{metadata_file_folder_path}/"
#         + "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
#     )
#     metadata_resp_1 = requests.get(metadata_file_1)
#     error_handler(metadata_resp_1)

# meta_file_1_nam = (
#     "DLS_BB057_CV244_AML_BMMC_ATAC088_ATAC089_GEX062_GEX063_MTDNA073_MTDNA074_0.05rnaclustres_0.05corr.json"
# )

#     # creating files in metadata path
#     with open(os.path.join(metadata_path, meta_file_1_nam), "w") as m_file_1:
#         metadata_1_content = metadata_resp_1.text
#         m_file_1.write(metadata_1_content)
#     source_folder_path = {"data": data_path, "metadata": metadata_path}
#     destination_folder = "transcriptomics_75"

#     # generating the data_metadata_mapping_dict
#     data_metadata_mapping_dict = {}
#     unmapped_file_names = []
#     (
#         data_metadata_mapping_dict,
#         unmapped_file_names,
#         unmapped_metadata_file_names,
#     ) = omix_obj._map_data_metadata_files_for_update(source_folder_path)
#     # coinstructing the metadata dict which then goes into the combined metadata.json

#     metadata_file_list = omix_hlpr.metadata_files_for_upload(metadata_path)
#     combined_metadata_dict = omix_obj._construct_metadata_dict_from_files(
#         repo_id,
#         metadata_file_list,
#         priority,
#         destination_folder,
#         data_metadata_mapping_dict,
#         metadata_path,
#         update=True,
#     )
#     assert combined_metadata_dict is not None
#     assert combined_metadata_dict["data"] is not None
#     types_in_metadata_dict = []
#     for item in combined_metadata_dict["data"]:
#         types_in_metadata_dict.append(item.get("type"))
#     assert "ingestion_metadata" in types_in_metadata_dict
#     assert "file_metadata" in types_in_metadata_dict


def test_get_metadata():
    Polly.auth(token)
    obj = omixatlas.OmixAtlas()
    correct_repo_name = "geo"
    correct_repo_id = 9
    incorrect_repo_key = [9]
    correct_dataset_id = "GSE100053_GPL10558"
    incorrect_dataset_id = 1234
    correct_table_name = "samples"
    incorrect_table_name = "datasets"
    assert isinstance(
        obj.get_metadata(correct_repo_name, correct_dataset_id, correct_table_name),
        pd.DataFrame,
    )
    assert isinstance(
        obj.get_metadata(correct_repo_id, correct_dataset_id, correct_table_name),
        pd.DataFrame,
    )
    with pytest.raises(
        paramException,
        match=r".*Argument 'table_name' not valid, .*",
    ):
        obj.get_metadata(correct_repo_name, correct_dataset_id, incorrect_table_name)

    with pytest.raises(
        paramException,
        match=r".*Argument 'dataset_id' is either empty or invalid. .*",
    ):
        obj.get_metadata(correct_repo_id, incorrect_dataset_id, correct_table_name)

    with pytest.raises(
        paramException,
        match=r".*Argument 'repo_key' is either empty or invalid. .*",
    ):
        obj.get_metadata(incorrect_repo_key, correct_dataset_id, correct_table_name)
