import re

from typing import List
from nltk.tokenize import WordPunctTokenizer

from dingo.model.model import Model
from dingo.model.modelres import ModelRes
from dingo.model.rule.base import BaseRule
from dingo.model.rule.common.xyz_head_word import get_xyz_head_word
from dingo.io import MetaData

@Model.rule_register('QUALITY_INEFFECTIVENESS', ['xyz_ar','xyz_ko','xyz_ru','xyz_th','xyz_vi','xyz_cs','xyz_hu','xyz_sr'])
class XyzShortContent(BaseRule):
    """check whether content is too short."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        tk = WordPunctTokenizer()
        tokens = tk.tokenize(input_data.content)
        words = [word for word in tokens if word.isalpha()]
        if len(words) < 5:
            res.error_status = True
            res.error_type = 'QUALITY_INEFFECTIVENESS'
            res.error_name = cls.__name__
            res.error_reason = 'Content is too short.'
        return res

@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_ar'])
class XyzArHeadWord(BaseRule):
    """check whether ar content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("ar")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res


@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_ru'])
class XyzRuHeadWord(BaseRule):
    """check whether ru content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("ru")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res


@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_ko'])
class XyzKoHeadWord(BaseRule):
    """check whether ko content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("ko")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res


@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_th'])
class XyzThHeadWord(BaseRule):
    """check whether th content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("th")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res


@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_vi'])
class XyzViHeadWord(BaseRule):
    """check whether vi content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("vi")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res


@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_cs'])
class XyzCsHeadWord(BaseRule):
    """check whether cs content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("cs")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res


@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_hu'])
class XyzHuHeadWord(BaseRule):
    """check whether hu content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("hu")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res

@Model.rule_register('QUALITY_IRRELEVANCE', ['xyz_sr'])
class XyzSrHeadWord(BaseRule):
    """check whether sr content contains irrelevance tail source info."""

    @classmethod
    def eval(cls, input_data: MetaData) -> ModelRes:
        res = ModelRes()
        keyword = get_xyz_head_word("sr")
        content_tail = input_data.content[-100:]
        matches = re.findall("|".join(keyword), content_tail)
        if len(matches) > 0:
            res.error_status = True
            res.error_type = 'QUALITY_IRRELEVANCE'
            res.error_name = cls.__name__
            res.error_reason = 'Content has irrelevance tail source info.'
        return res