package edu.berkeley.nlp.treebank;

import edu.berkeley.nlp.tokenizer.TokenizerFactory;
import edu.berkeley.nlp.util.Filter;
import edu.berkeley.nlp.util.Filters;
import java.io.Serializable;

/* loaded from: input_file:edu/berkeley/nlp/treebank/ChineseTreebankLanguagePack.class */
public class ChineseTreebankLanguagePack extends AbstractTreebankLanguagePack implements Serializable {
    private static TokenizerFactory tf;
    public static final String ENCODING = "GB18030";
    private static final char[] annotationIntroducingChars = {'-', '=', '|', '#', '^', '~'};
    private static final String[] startSymbols = {"ROOT"};
    private static final String[] tags = {"PU"};
    private static final String[] comma = {",", "，", "\u3000"};
    private static final String[] endSentence = {"。", "．", "！", "？", "?", "!", "."};
    private static final String[] douHao = {"、"};
    private static final String[] quoteMark = {"“", "”", "‘", "’", "《", "》", "『", "』", "〈", "〉", "「", "」", "＂", "＜", "＞", "`", "＇"};
    private static final String[] parenthesis = {"（", "）", "-LRB-", "-RRB-", "【", "】"};
    private static final String[] colon = {"：", "；", "∶", ":"};
    private static final String[] dash = {"…", "—", "——", "———", "－", "－－", "──", "━", "━━", "—－", "-", "----", "~", "……", "～"};
    private static final String[] other = {"·", "／", "／", "＊", "＆", "/", "//", "*"};
    private static String[] leftQuoteMark = {"“", "‘", "《", "『", "〈", "「", "＜", "`"};
    private static String[] rightQuoteMark = {"”", "’", "》", "』", "〉", "」", "＞", "＇"};
    private static String[] leftParenthesis = {"（", "-LRB-", "【"};
    private static String[] rightParenthesis = {"）", "-RRB-", "】"};
    private static final String[] punctWords = new String[(((((((tags.length + comma.length) + endSentence.length) + douHao.length) + quoteMark.length) + parenthesis.length) + colon.length) + dash.length) + other.length];
    private static final long serialVersionUID = 5757403475523638802L;

    public static void setTokenizerFactory(TokenizerFactory tokenizerFactory) {
        tf = tokenizerFactory;
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public String getEncoding() {
        return ENCODING;
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public boolean isPunctuationTag(String str) {
        return str.equals("PU");
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public boolean isPunctuationWord(String str) {
        return chineseCommaAcceptFilter().accept(str) || chineseEndSentenceAcceptFilter().accept(str) || chineseDouHaoAcceptFilter().accept(str) || chineseQuoteMarkAcceptFilter().accept(str) || chineseParenthesisAcceptFilter().accept(str) || chineseColonAcceptFilter().accept(str) || chineseDashAcceptFilter().accept(str) || chineseOtherAcceptFilter().accept(str);
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public boolean isSentenceFinalPunctuationTag(String str) {
        return chineseEndSentenceAcceptFilter().accept(str);
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public String[] punctuationTags() {
        return tags;
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public String[] punctuationWords() {
        return punctWords;
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public String[] sentenceFinalPunctuationTags() {
        return tags;
    }

    @Override // edu.berkeley.nlp.treebank.TreebankLanguagePack
    public String[] sentenceFinalPunctuationWords() {
        return endSentence;
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public boolean isEvalBIgnoredPunctuationTag(String str) {
        return Filters.collectionAcceptFilter(tags).accept(str);
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public char[] labelAnnotationIntroducingCharacters() {
        return annotationIntroducingChars;
    }

    @Override // edu.berkeley.nlp.treebank.AbstractTreebankLanguagePack, edu.berkeley.nlp.treebank.TreebankLanguagePack
    public String[] startSymbols() {
        return startSymbols;
    }

    public static Filter<String> chineseCommaAcceptFilter() {
        return Filters.collectionAcceptFilter(comma);
    }

    public static Filter<String> chineseEndSentenceAcceptFilter() {
        return Filters.collectionAcceptFilter(endSentence);
    }

    public static Filter<String> chineseDouHaoAcceptFilter() {
        return Filters.collectionAcceptFilter(douHao);
    }

    public static Filter<String> chineseQuoteMarkAcceptFilter() {
        return Filters.collectionAcceptFilter(quoteMark);
    }

    public static Filter<String> chineseParenthesisAcceptFilter() {
        return Filters.collectionAcceptFilter(parenthesis);
    }

    public static Filter<String> chineseColonAcceptFilter() {
        return Filters.collectionAcceptFilter(colon);
    }

    public static Filter<String> chineseDashAcceptFilter() {
        return Filters.collectionAcceptFilter(dash);
    }

    public static Filter<String> chineseOtherAcceptFilter() {
        return Filters.collectionAcceptFilter(other);
    }

    public static Filter<String> chineseLeftParenthesisAcceptFilter() {
        return Filters.collectionAcceptFilter(leftParenthesis);
    }

    public static Filter<String> chineseRightParenthesisAcceptFilter() {
        return Filters.collectionAcceptFilter(rightParenthesis);
    }

    public static Filter<String> chineseLeftQuoteMarkAcceptFilter() {
        return Filters.collectionAcceptFilter(leftQuoteMark);
    }

    public static Filter<String> chineseRightQuoteMarkAcceptFilter() {
        return Filters.collectionAcceptFilter(rightQuoteMark);
    }

    @Override // edu.berkeley.nlp.treebank.TreebankLanguagePack
    public String treebankFileExtension() {
        return "fid";
    }

    static {
        System.arraycopy(tags, 0, punctWords, 0, tags.length);
        int length = 0 + tags.length;
        System.arraycopy(comma, 0, punctWords, length, comma.length);
        int length2 = length + comma.length;
        System.arraycopy(endSentence, 0, punctWords, length2, endSentence.length);
        int length3 = length2 + endSentence.length;
        System.arraycopy(douHao, 0, punctWords, length3, douHao.length);
        int length4 = length3 + douHao.length;
        System.arraycopy(quoteMark, 0, punctWords, length4, quoteMark.length);
        int length5 = length4 + quoteMark.length;
        System.arraycopy(parenthesis, 0, punctWords, length5, parenthesis.length);
        int length6 = length5 + parenthesis.length;
        System.arraycopy(colon, 0, punctWords, length6, colon.length);
        int length7 = length6 + colon.length;
        System.arraycopy(dash, 0, punctWords, length7, dash.length);
        System.arraycopy(other, 0, punctWords, length7 + dash.length, other.length);
    }
}
