DATA_TYPES = {
    "Mutation": [
        {
            "format": ["maf"],
            "supported_repo": [
                {
                    "name": "cbioportal",
                    "header_mapping": {
                        "gene": "Hugo_Symbol",
                        "chr": "Chromosome",
                        "startPosition": "Start_Position",
                        "endPosition": "End_Position",
                        "referenceAllele": "Reference_Allele",
                        "variantAllele": "Tumor_Seq_Allele2",
                        "mutationType": "Variant_Classification",
                        "variantType": "Variant_Type",
                        "uniqueSampleKey": "Tumor_Sample_Barcode",
                    },
                },
                {"name": "tcga", "header_mapping": {}},
            ],
        }
    ]
}

# endpoints
CONSTANTS_ENDPOINT = "/constants"
REPOSITORIES_ENDPOINT = "/repositories"
REPOSITORY_PACKAGE_ENDPOINT = REPOSITORIES_ENDPOINT + "/{}/packages"
IMAGE_URL_ENDPOINT = (
    "https://elucidatainc.github.io/PublicAssets/discover-fe-assets/omixatlas_hex.svg"
)

# statuscodes
OK = 200
CREATED = 201
COMPUTE_ENV_VARIABLE = "POLLY_TYPE"
UPLOAD_URL_CREATED = 204
UPLOAD_ERROR_CODE = 400

# Multipart S3 upload parameters
KB = 1024
MB = KB * KB
GB = 1024 * MB
SMALL_FILE_SIZE = 20 * GB
MEDIUM_FILE_SIZE = 40 * GB

MULTIPART_THRESHOLD = 25 * MB
MAX_CONCURRENCY = 32

# Chunksize for file less than 20 gb
MULTIPART_CHUNKSIZE_SMALL_FILE_SIZE = 25 * MB
IO_CHUNKSIZE_SMALL_FILE_SIZE = 25 * MB

# Chunksize for file more than 20gb and less than 40gb
MULTIPART_CHUNKSIZE_MEDIUM_FILE_SIZE = 50 * MB
IO_CHUNKSIZE_MEDIUM_FILE_SIZE = 50 * MB

# Chunksize for file more than 40gb
MULTIPART_CHUNKSIZE_LARGE_FILE_SIZE = 100 * MB
IO_CHUNKSIZE_LARGE_FILE_SIZE = 100 * MB

# S3 Exceptions
EXPIRED_TOKEN = "ExpiredToken"

# cohort constants
COHORT_VERSION = "0.2"
COHORT_CONSTANTS_URL = (
    "https://elucidatainc.github.io/PublicAssets/cohort_constants.txt"
)
REPORT_FIELDS_URL = "https://elucidatainc.github.io/PublicAssets/report_fields.txt"
OBSOLETE_METADATA_FIELDS = [
    "package",
    "region",
    "bucket",
    "key",
    "file_type",
    "file_location",
    "src_uri",
    "timestamp_",
]
dot = "."

GETTING_UPLOAD_URLS_PAYLOAD = {"data": {"type": "files", "attributes": {"folder": ""}}}

INGESTION_LEVEL_METADATA = {
    "id": "metadata/ingestion",
    "type": "ingestion_metadata",
    "attributes": {
        "ignore": "false",
        "urgent": "true",
        "v1_infra": False,
        "priority": "low",
    },
}

METADATA = {"data": []}

GET_SCHEMA_RETURN_TYPE_VALS = ["dataframe", "dict"]

COMBINED_METADATA_FILE_NAME = "combined_metadata.json"

FILES_PATH_FORMAT = {"metadata": "<metadata_path>", "data": "<data_path>"}

INGESTION_FILES_PATH_DIR_NAMES = ["metadata", "data"]

FORMATTED_METADATA = {"id": "", "type": "", "attributes": {}}

FILE_FORMAT_CONSTANTS_URL = (
    "https://elucidatainc.github.io/PublicAssets/file_format_constants.txt"
)

COMPRESSION_TYPES = [
    ".br," ".bz2",
    ".bz",
    ".gz",
    ".lz",
    ".lz4",
    ".sz",
    ".rz",
    ".xz",
    ".zip",
    ".tar",
    ".bgz",
]

FILE_FORMAT_CONSTANTS = {
    "data": [
        ".gct",
        ".vcf",
        ".h5ad",
        ".mmcif",
        ".h5seurat",
        ".biom",
        ".zip",
        ".fs",
        ".tar.gz",
        ".fcs",
        ".vcf.bgz",
        ".gct.bz",
    ],
    "metadata": [".json", ".jpco"],
}
NOT_NEEDED_SCHEMA_FIELDS = [
    "is_current",
    "data_table_version",
    "data_table_name",
    "timestamp_",
]

OMIXATLAS_CATEGORY_VALS = ["private", "public", "diy_poa"]

VALIDATION_LEVEL_CONSTANTS = {"advanced": "value", "basic": "schema"}

BASE_TEST_FORMAT_CONSTANTS_URL = "https://github.com/ElucidataInc/PublicAssets/blob/master/internal-user/add_dataset_test_file"

BASE_TEST_META_CONST = "https://raw.githubusercontent.com/ElucidataInc/PublicAssets/master/internal-user/add_dataset_test_file"

ELUCIDATA_LOGO_URL = (
    "https://elucidatainc.github.io/PublicAssets/dashboardFrontend/elucidata-logo.svg"
)

SCHEMA_WRITE_SUCCESS = (
    "Job for your schema update has been recorded. Depending on the changes,"
    + "minor updates or complete reindexing will be performed for you data. The repository will be locked for the "
    + "duration of this update. Please refer to the ingestion monitoring dashboard for visibility on this process."
)

SCHEMA_INSERT_SUCCESS = "Schema has been Inserted. Please use get_schema functionality to get the inserted schema."


REPORT_GENERATION_SUPPORTED_REPOS = ["geo"]

FIELD_NAME_LOC = "field_name"

DDL_CONST_LIST = [
    "ALL",
    "ALTER",
    "AND",
    "ARRAY",
    "AS",
    "AUTHORIZATION",
    "BETWEEN",
    "BIGINT",
    "BINARY",
    "BOOLEAN",
    "BOTH",
    "BY",
    "CASE",
    "CASHE",
    "CAST",
    "CHAR",
    "COLUMN",
    "CONF",
    "CONSTRAINT",
    "COMMIT",
    "CREATE",
    "CROSS",
    "CUBE",
    "CURRENT",
    "CURRENT_DATE",
    "CURRENT_TIMESTAMP",
    "CURSOR",
    "DATABASE",
    "DATE",
    "DAYOFWEEK",
    "DECIMAL",
    "DELETE",
    "DESCRIBE",
    "DISTINCT",
    "DOUBLE",
    "DROP",
    "ELSE",
    "END",
    "EXCHANGE",
    "EXISTS",
    "EXTENDED",
    "EXTERNAL",
    "EXTRACT",
    "FALSE",
    "FETCH",
    "FLOAT",
    "FLOOR",
    "FOLLOWING",
    "FOR",
    "FOREIGN",
    "FROM",
    "FULL",
    "FUNCTION",
    "GRANT",
    "GROUP",
    "GROUPING",
    "HAVING",
    "IF",
    "IMPORT",
    "IN",
    "INNER",
    "INSERT",
    "INT",
    "INTEGER",
    "INTERSECT",
    "INTERVAL",
    "INTO",
    "IS",
    "JOIN",
    "LATERAL",
    "LEFT",
    "LESS",
    "LIKE",
    "LOCAL",
    "MACRO",
    "MAP",
    "MORE",
    "NONE",
    "NOT",
    "NULL",
    "NUMERIC",
    "OF",
    "ON",
    "ONLY",
    "OR",
    "ORDER",
    "OUT",
    "OUTER",
    "OVER",
    "PARTIALSCAN",
    "PARTITION",
    "PERCENT",
    "PRECEDING",
    "PRECISION",
    "PRESERVE",
    "PRIMARY",
    "PROCEDURE",
    "RANGE",
    "READS",
    "REDUCE",
    "REGEXP",
    "REFERENCES",
    "REVOKE",
    "RIGHT",
    "RLIKE",
    "ROLLBACK",
    "ROLLUP",
    "ROW",
    "ROWS",
    "SELECT",
    "SET",
    "SMALLINT",
    "START",
    "TABLE",
    "TABLESAMPLE",
    "THEN",
    "TIME",
    "TIMESTAMP",
    "TO",
    "TRANSFORM",
    "TRIGGER",
    "TRUE",
    "TRUNCATE",
    "UNBOUNDED",
    "UNION",
    "UNIQUEJOIN",
    "UPDATE",
    "USER",
    "USING",
    "UTC_TIMESTAMP",
    "VALUES",
    "VARCHAR",
    "VIEWS",
    "WHEN",
    "WHERE",
    "WINDOW",
    "WITH",
]

DML_CONST_LIST = [
    "ALTER",
    "AND",
    "AS",
    "BETWEEN",
    "BY",
    "CASE",
    "CAST",
    "CONSTRAINT",
    "CREATE",
    "CROSS",
    "CUBE",
    "CURRENT_DATE",
    "CURRENT_PATH",
    "CURRENT_TIME",
    "CURRENT_TIMESTAMP",
    "CURRENT_USER",
    "DEALLOCATE",
    "DELETE",
    "DESCRIBE",
    "DISTINCT",
    "DROP",
    "ELSE",
    "END",
    "ESCAPE",
    "EXCEPT",
    "EXECUTE",
    "EXISTS",
    "EXTRACT",
    "FALSE",
    "FIRST",
    "FOR",
    "FROM",
    "FULL",
    "GROUP",
    "GROUPING",
    "HAVING",
    "IN",
    "INNER",
    "INSERT",
    "INTERSECT",
    "INTO",
    "IS",
    "JOIN",
    "LAST",
    "LEFT",
    "LIKE",
    "LOCALTIME",
    "LOCALTIMESTAMP",
    "NATURAL",
    "NORMALIZE",
    "NOT",
    "NULL",
    "OF",
    "ON",
    "OR",
    "ORDER",
    "OUTER",
    "PREPARE",
    "RECURSIVE",
    "RIGHT",
    "ROLLUP",
    "SELECT",
    "TABLE",
    "THEN",
    "TRUE",
    "UNESCAPE",
    "UNION",
    "UNNEST",
    "USING",
    "VALUES",
    "WHEN",
    "WHERE",
    "WITH",
]

# curation_library constants
SUPPORTED_ENTITY_TYPES = [
    "disease",
    "drug",
    "species",
    "tissue",
    "cell_type",
    "cell_line",
    "gene",
]

CURATION_COHORT_CACHE = "./.cache/"

SCHEMA_VALIDATION_BASE_URL = (
    "https://raw.githubusercontent.com/ElucidataInc/PublicAssets/master/"
    + "internal-user/schema_validation"
)

SCHEMA_VALIDATION = {
    "empty_repo_id": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_empty_repo_id.json",
    "missing_repo_id": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_missing_repo_id.json",
    "missing_schema_key": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_missing_schema_key.json",
    "wrong_repo_id": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_wrong_repo_id.json",
    "field_name_cap": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_field_name_capital.json",
    "field_name_underscore": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_field_name_with_underscore.json",
    "field_name_resv_keyword": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_field_name_with_resv_keyword.json",
    "original_name_empty": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_original_name_empty.json",
    "original_name_grtr_50": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_original_name_gtr_50.json",
    "type_cosco": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_type_cosco.json",
    "is_arr_str": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_is_arr_str.json",
    "is_keyword_str": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_is_keywrd_str.json",
    "filter_size_less": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_filter_size_less.json",
    "filter_size_greater": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_filter_size_greater.json",
    "display_name_empty": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_display_name_empty.json",
    "display_name_grtr_50": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_display_name_grtr_50.json",
    "is_keywrd_is_filter": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_is_keywrd_is_filter.json",
    "is_keywrd_is_ontology": f"{SCHEMA_VALIDATION_BASE_URL}/demo_oa_2_field_name_is_filter_is_ontology.json",
    "positive_case": f"{SCHEMA_VALIDATION_BASE_URL}/positive_case.json",
    "original_name_int": f"{SCHEMA_VALIDATION_BASE_URL}/original_name_int.json",
    "filter_size_str": f"{SCHEMA_VALIDATION_BASE_URL}/filter_size_str.json",
}

MIN_REQUIRED_KEYS_FOR_JOBS = ["cpu", "memory", "machineType", "image", "tag", "name"]

MACHINES_FOR_JOBS = (
    {
        "gp": "4 vCPU, 16GB RAM",
        "ci2xlarge": "16 vCPU, 32GB RAM",
        "ci3xlarge": "36 vCPU, 72GB RAM",
        "mi2xlarge": "4 vCPU, 32GB RAM",
        "mi3xlarge": "8 vCPU, 64GB RAM",
        "mi4xlarge": "16 vCPU, 122GB RAM",
        "mi5xlarge": "32 vCPU, 250GB RAM",
        "mi6xlarge": "64 vCPU, 500GB RAM",
        "mi7xlarge": "64 vCPU, 970GB RAM",
        "mix5xlarge": "16vCPU, 512GB RAM",
        "mix6xlarge": "24vCPU, 768GB RAM",
        "mix7xlarge": "64vCPU, 1024GB RAM",
        "gpusmall": "1 GPU, 4 vCPU, 16GB RAM",
        "gpumedium": "4 GPU, 32 vCPU, 240GB RAM",
        "gpularge": "8 GPU, 64 vCPU, 480GB RAM",
        "gpuxlarge": "8 GPU, 96 vCPU, 760GB RAM",
    },
)

DATA_LOG_MODES = ["latest", "all"]
MIXPANEL_KEY = "91fa77fcf07f7b672b5c5c6c09d8a14c"
