/*
 * Decompiled with CFR 0.152.
 */
package corenlp.rdrsegmenter;

import corenlp.rdrsegmenter.FWObject;
import corenlp.rdrsegmenter.Node;
import corenlp.rdrsegmenter.Utils;
import corenlp.rdrsegmenter.Vocabulary;
import corenlp.rdrsegmenter.WordTag;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

public class WordSegmenter {
    private Node root;
    private Vocabulary vocabulary;

    public WordSegmenter(String modelPath, Vocabulary vocabulary) throws IOException {
        this.vocabulary = vocabulary;
        this.root = this.constructTreeFromRulesFile(modelPath);
    }

    private Node constructTreeFromRulesFile(String rulesFilePath) throws IOException {
        String line;
        Node root;
        BufferedReader buffer = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(new File(rulesFilePath)), "UTF-8"));
        buffer.readLine();
        Node currentNode = root = new Node(new FWObject(false), "NN", null, null, null, 0);
        int currentDepth = 0;
        while ((line = buffer.readLine()) != null) {
            int depth = 0;
            while (line.charAt(depth) == '\t') {
                ++depth;
            }
            if ((line = line.trim()).length() == 0 || line.contains("cc:")) continue;
            FWObject condition = Utils.getCondition(line.split(" : ")[0].trim());
            String conclusion = Utils.getConcreteValue(line.split(" : ")[1].trim());
            Node node = new Node(condition, conclusion, null, null, null, depth);
            if (depth > currentDepth) {
                currentNode.setExceptNode(node);
            } else if (depth == currentDepth) {
                currentNode.setIfnotNode(node);
            } else {
                while (currentNode.getDepth() != depth) {
                    currentNode = currentNode.getFatherNode();
                }
                currentNode.setIfnotNode(node);
            }
            node.setFatherNode(currentNode);
            currentNode = node;
            currentDepth = depth;
        }
        buffer.close();
        return root;
    }

    private Node findFiredNode(FWObject object) {
        Node currentNode = this.root;
        Node firedNode = null;
        while (true) {
            if (currentNode.satisfy(object)) {
                firedNode = currentNode;
                if (currentNode.getExceptNode() == null) break;
                currentNode = currentNode.getExceptNode();
                continue;
            }
            if (currentNode.getIfnotNode() == null) break;
            currentNode = currentNode.getIfnotNode();
        }
        return firedNode;
    }

    private List<WordTag> getInitialSegmentation(String sentence) {
        ArrayList<WordTag> wordtags = new ArrayList<WordTag>();
        for (Map.Entry<String, String> entry : Utils.NORMALIZER.entrySet()) {
            if (!sentence.contains(entry.getKey())) continue;
            sentence = sentence.replaceAll(entry.getKey(), entry.getValue());
        }
        List<String> tokens = Arrays.asList(sentence.split("\\s+"));
        List<String> lowerTokens = Arrays.asList(sentence.toLowerCase().split("\\s+"));
        int senLength = tokens.size();
        int i = 0;
        while (i < senLength) {
            String token = tokens.get(i);
            if (token.chars().allMatch(Character::isLetter)) {
                if (Character.isLowerCase(token.charAt(0)) && i + 1 < senLength && Character.isUpperCase(tokens.get(i + 1).charAt(0))) {
                    wordtags.add(new WordTag(token, "B"));
                    ++i;
                    continue;
                }
                boolean isSingleSyllabel = true;
                for (int j = Math.min(i + 4, senLength); j > i + 1; --j) {
                    String word = String.join((CharSequence)" ", lowerTokens.subList(i, j));
                    if (!this.vocabulary.VN_DICT.contains(word) && !Vocabulary.VN_LOCATIONS.contains(word) && !Vocabulary.COUNTRY_L_NAME.contains(word)) continue;
                    wordtags.add(new WordTag(token, "B"));
                    for (int k = i + 1; k < j; ++k) {
                        wordtags.add(new WordTag(tokens.get(k), "I"));
                    }
                    i = j - 1;
                    isSingleSyllabel = false;
                    break;
                }
                if (isSingleSyllabel) {
                    String ntoken;
                    String lowercasedToken = lowerTokens.get(i);
                    if (Vocabulary.VN_FIRST_SENT_WORDS.contains(lowercasedToken) || Character.isLowerCase(token.charAt(0)) || token.chars().allMatch(Character::isUpperCase) || Vocabulary.COUNTRY_S_NAME.contains(lowercasedToken) || Vocabulary.WORLD_COMPANY.contains(lowercasedToken)) {
                        wordtags.add(new WordTag(token, "B"));
                        ++i;
                        continue;
                    }
                    int ilower = i + 1;
                    for (ilower = i + 1; ilower < Math.min(i + 4, senLength) && !Character.isLowerCase((ntoken = tokens.get(ilower)).charAt(0)) && ntoken.chars().allMatch(Character::isLetter) && !ntoken.equals("LBKT") && !ntoken.equals("RBKT"); ++ilower) {
                    }
                    if (ilower > i + 1) {
                        String prevT;
                        boolean isNotMiddleName = true;
                        if (Vocabulary.VN_MIDDLE_NAMES.contains(lowercasedToken) && i >= 1 && Character.isUpperCase((prevT = tokens.get(i - 1)).charAt(0)) && Vocabulary.VN_FAMILY_NAMES.contains(prevT.toLowerCase())) {
                            wordtags.add(new WordTag(token, "I"));
                            isNotMiddleName = false;
                        }
                        if (isNotMiddleName) {
                            wordtags.add(new WordTag(token, "B"));
                        }
                        for (int k = i + 1; k < ilower; ++k) {
                            wordtags.add(new WordTag(tokens.get(k), "I"));
                        }
                        i = ilower - 1;
                    } else {
                        wordtags.add(new WordTag(token, "B"));
                    }
                }
            } else {
                wordtags.add(new WordTag(token, "B"));
            }
            ++i;
        }
        return wordtags;
    }

    public String segmentTokenizedString(String str) {
        String line = str.trim();
        if (line.length() == 0) {
            return "";
        }
        List<WordTag> wordtags = this.getInitialSegmentation(line);
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < wordtags.size(); ++i) {
            FWObject object = Utils.getObject(wordtags, wordtags.size(), i);
            Node firedNode = this.findFiredNode(object);
            if (firedNode.getDepth() > 0) {
                if (firedNode.getConclusion().equals("B")) {
                    sb.append(" " + wordtags.get((int)i).form);
                    continue;
                }
                sb.append("_" + wordtags.get((int)i).form);
                continue;
            }
            if (wordtags.get((int)i).tag.equals("B")) {
                sb.append(" " + wordtags.get((int)i).form);
                continue;
            }
            sb.append("_" + wordtags.get((int)i).form);
        }
        return sb.toString().trim();
    }
}

