package edu.stanford.nlp.trees.international.arabic;

import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.PTBLexer;
import edu.stanford.nlp.trees.AbstractTreebankLanguagePack;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.TreeReaderFactory;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/trees/international/arabic/ArabicTreebankLanguagePack.class */
public class ArabicTreebankLanguagePack extends AbstractTreebankLanguagePack {
    private boolean detPlusNounIsBasicCategory;
    private static transient TokenizerFactory<? extends HasWord> tf;
    static final String detPlusNoun = "DET+NOUN";
    private static final long serialVersionUID = 9081305982861675328L;
    private static String[] pennPunctTags = {"PUNC", ","};
    private static String[] pennSFPunctTags = {"."};
    private static String[] collinsPunctTags = {"PUNC"};
    private static String[] pennPunctWords = {PTBLexer.closedblquote, "'", PTBLexer.opendblquote, "`", PTBLexer.openparen, PTBLexer.closeparen, PTBLexer.openbrace, PTBLexer.closebrace, "-PLUS-", ".", "!", ",", "-", PTBLexer.ptbmdash, PTBLexer.ptb3EllipsisStr, "%", "&", "\"", "\"__", "*", "+", "=", "-", "-RRB-_", "-RRB-__", "-_", "-__", "_", "..", PTBLexer.ptb3EllipsisStr, "......", "/", "\\", ":", ":_", ":__", ";", "?\"", "?\".", "?", "?.", ">"};
    private static String[] pennSFPunctWords = {".", "!", "?", "?\"", "?\".", "?", "?."};
    private static char[] annotationIntroducingChars = {'-', '=', '|', '#', '^', '~'};
    private static String[] pennStartSymbols = {"ROOT"};
    private static final Pattern detPlusNounPattern = Pattern.compile("^DET\\+NOUN");

    public ArabicTreebankLanguagePack() {
        this(false);
    }

    public ArabicTreebankLanguagePack(boolean z) {
        this.detPlusNounIsBasicCategory = z;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] punctuationTags() {
        return pennPunctTags;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] punctuationWords() {
        return pennPunctWords;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] sentenceFinalPunctuationTags() {
        return pennSFPunctTags;
    }

    @Override // edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] sentenceFinalPunctuationWords() {
        return pennSFPunctWords;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] evalBIgnoredPunctuationTags() {
        return collinsPunctTags;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public char[] labelAnnotationIntroducingCharacters() {
        return annotationIntroducingChars;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String[] startSymbols() {
        return pennStartSymbols;
    }

    public void setTokenizerFactory(TokenizerFactory<? extends HasWord> tokenizerFactory) {
        tf = tokenizerFactory;
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public TokenizerFactory<? extends HasWord> getTokenizerFactory() {
        if (tf == null) {
            tf = ArabicTokenizer.factory();
        }
        return tf;
    }

    @Override // edu.stanford.nlp.trees.TreebankLanguagePack
    public String treebankFileExtension() {
        return "tree";
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public TreeReaderFactory treeReaderFactory() {
        return new ArabicTreeReaderFactory();
    }

    public static void main(String[] strArr) {
        PennTreebankLanguagePack pennTreebankLanguagePack = new PennTreebankLanguagePack();
        System.out.println("Start symbol: " + pennTreebankLanguagePack.startSymbol());
        System.out.println("Should be true: " + pennTreebankLanguagePack.isStartSymbol(pennTreebankLanguagePack.startSymbol()));
        for (String str : new String[]{"-", "-LLB-", "NP-2", "NP=3", "NP-LGS", "NP-TMP=3"}) {
            System.out.println("String: " + str + " basic: " + pennTreebankLanguagePack.basicCategory(str) + " basicAndFunc: " + pennTreebankLanguagePack.categoryAndFunction(str));
        }
    }

    @Override // edu.stanford.nlp.trees.AbstractTreebankLanguagePack, edu.stanford.nlp.trees.TreebankLanguagePack
    public String basicCategory(String str) {
        return (this.detPlusNounIsBasicCategory && detPlusNounPattern.matcher(str).find()) ? detPlusNoun : super.basicCategory(str);
    }

    public String toString() {
        return "ArabicTreebankLanguagePack";
    }

    @Override // edu.stanford.nlp.trees.TreebankLanguagePack
    public HeadFinder headFinder() {
        return new ArabicHeadFinder(this);
    }
}
