package edu.northwestern.at.utils.corpuslinguistics.postagger;

import edu.northwestern.at.utils.IsCloseable;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MutableInteger;
import edu.northwestern.at.utils.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.utils.corpuslinguistics.adornedword.BaseAdornedWord;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.DefaultSuffixLexicon;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.DefaultWordLexicon;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.LexiconFactory;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.UsesLexicon;
import edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.DefaultPartOfSpeechGuesser;
import edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser;
import edu.northwestern.at.utils.corpuslinguistics.postagger.smoothing.contextual.ContextualSmoother;
import edu.northwestern.at.utils.corpuslinguistics.postagger.smoothing.lexical.LexicalSmoother;
import edu.northwestern.at.utils.corpuslinguistics.postagger.transitionmatrix.TransitionMatrix;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.PostTokenizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.PostTokenizerFactory;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/northwestern/at/utils/corpuslinguistics/postagger/AbstractPartOfSpeechTagger.class */
public abstract class AbstractPartOfSpeechTagger extends IsCloseableObject implements PartOfSpeechTagger, IsCloseable, UsesLexicon, UsesLogger {
    protected Lexicon lexicon;
    protected Lexicon dynamicLexicon;
    protected TransitionMatrix transitionMatrix;
    protected String[] contextRules;
    protected String[] lexicalRules;
    protected LexicalSmoother lexicalSmoother;
    protected ContextualSmoother contextualSmoother;
    protected PartOfSpeechRetagger retagger;
    protected PartOfSpeechGuesser partOfSpeechGuesser;
    protected PostTokenizer postTokenizer;
    protected int ruleCorrections = 0;
    protected Logger logger;

    public AbstractPartOfSpeechTagger() {
        LexiconFactory lexiconFactory = new LexiconFactory();
        this.lexicon = lexiconFactory.newLexicon();
        this.dynamicLexicon = lexiconFactory.newLexicon();
        this.postTokenizer = new PostTokenizerFactory().newPostTokenizer();
        this.logger = new DummyLogger();
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public Logger getLogger() {
        return this.logger;
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public boolean usesContextRules() {
        return false;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public boolean usesLexicalRules() {
        return false;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public boolean usesTransitionProbabilities() {
        return false;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public void setContextRules(String[] strArr) throws InvalidRuleException {
        this.contextRules = strArr;
        if (this.retagger != null) {
            this.retagger.setContextRules(strArr);
        }
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public void setLexicalRules(String[] strArr) throws InvalidRuleException {
        this.lexicalRules = strArr;
        if (this.retagger != null) {
            this.retagger.setLexicalRules(strArr);
        }
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger, edu.northwestern.at.utils.corpuslinguistics.lexicon.UsesLexicon
    public Lexicon getLexicon() {
        return this.lexicon;
    }

    public Lexicon getDynamicLexicon() {
        return this.dynamicLexicon;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public Lexicon getLexicon(String str) {
        Lexicon lexicon = this.lexicon;
        if (this.partOfSpeechGuesser != null) {
            lexicon = this.partOfSpeechGuesser.getCachedLexiconForWord(str);
        }
        return lexicon;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger, edu.northwestern.at.utils.corpuslinguistics.lexicon.UsesLexicon
    public void setLexicon(Lexicon lexicon) {
        this.lexicon = lexicon;
        if (this.retagger != null) {
            this.retagger.setLexicon(this.lexicon);
        }
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public TransitionMatrix getTransitionMatrix() {
        return this.transitionMatrix;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public void setTransitionMatrix(TransitionMatrix transitionMatrix) {
        this.transitionMatrix = transitionMatrix;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public PartOfSpeechGuesser getPartOfSpeechGuesser() {
        return this.partOfSpeechGuesser;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public void setPartOfSpeechGuesser(PartOfSpeechGuesser partOfSpeechGuesser) {
        this.partOfSpeechGuesser = partOfSpeechGuesser;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public PartOfSpeechRetagger getRetagger() {
        return this.retagger;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public void setRetagger(PartOfSpeechRetagger partOfSpeechRetagger) {
        this.retagger = partOfSpeechRetagger;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public List<String> getTagsForWord(String str) {
        Set<String> keySet;
        if (this.lexicon.containsEntry(str)) {
            keySet = this.lexicon.getCategoriesForEntry(str);
        } else if (this.dynamicLexicon.containsEntry(str)) {
            keySet = this.dynamicLexicon.getCategoriesForEntry(str);
        } else {
            if (this.partOfSpeechGuesser == null) {
                createPartOfSpeechGuesser();
            }
            Map<String, MutableInteger> guessPartsOfSpeech = this.partOfSpeechGuesser.guessPartsOfSpeech(str);
            keySet = guessPartsOfSpeech.keySet();
            for (String str2 : keySet) {
                this.dynamicLexicon.updateEntryCount(str, str2, "*", guessPartsOfSpeech.get(str2).intValue());
            }
        }
        return ListFactory.createNewList(keySet);
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public int getTagCount(String str, String str2) {
        int categoryCount;
        if (this.lexicon.containsEntry(str)) {
            categoryCount = this.lexicon.getCategoryCount(str, str2);
        } else if (this.dynamicLexicon.containsEntry(str)) {
            categoryCount = this.dynamicLexicon.getCategoryCount(str, str2);
        } else {
            getTagsForWord(str);
            categoryCount = this.dynamicLexicon.getCategoryCount(str, str2);
        }
        return Math.max(categoryCount, 1);
    }

    public String getMostCommonTag(String str) {
        String largestCategory;
        if (this.lexicon.containsEntry(str)) {
            largestCategory = this.lexicon.getLargestCategory(str);
        } else if (this.dynamicLexicon.containsEntry(str)) {
            largestCategory = this.dynamicLexicon.getLargestCategory(str);
        } else {
            getTagsForWord(str);
            largestCategory = this.dynamicLexicon.getLargestCategory(str);
        }
        return largestCategory;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public List<List<AdornedWord>> tagSentences(List<List<String>> list) {
        List<List<AdornedWord>> createNewList = ListFactory.createNewList();
        Iterator<List<String>> it = list.iterator();
        while (it.hasNext()) {
            createNewList.add(retagWords(tagSentence(it.next())));
        }
        return createNewList;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public <T extends AdornedWord> List<List<T>> tagAdornedWordSentences(List<List<T>> list) {
        Iterator<List<T>> it = list.iterator();
        while (it.hasNext()) {
            retagWords(tagAdornedWordSentence(it.next()));
        }
        return list;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public <T extends AdornedWord> List<T> retagWords(List<T> list) {
        return this.retagger != null ? this.retagger.retagSentence(list) : list;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public void clearRuleCorrections() {
        this.ruleCorrections = 0;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public void incrementRuleCorrections() {
        this.ruleCorrections++;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public int getRuleCorrections() {
        return this.ruleCorrections;
    }

    protected void createPartOfSpeechGuesser() {
        try {
            if (this.partOfSpeechGuesser == null) {
                DefaultPartOfSpeechGuesser defaultPartOfSpeechGuesser = new DefaultPartOfSpeechGuesser();
                if (this.lexicon == null) {
                    setLexicon(new DefaultWordLexicon());
                }
                defaultPartOfSpeechGuesser.setWordLexicon(this.lexicon);
                defaultPartOfSpeechGuesser.setSuffixLexicon(new DefaultSuffixLexicon());
                defaultPartOfSpeechGuesser.setLogger(this.logger);
                setPartOfSpeechGuesser(defaultPartOfSpeechGuesser);
            }
        } catch (Exception e) {
        }
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public List<AdornedWord> tagSentence(List<String> list) {
        List<AdornedWord> createNewList = ListFactory.createNewList();
        for (int i = 0; i < list.size(); i++) {
            String str = list.get(i);
            String str2 = str;
            String str3 = str;
            if (this.postTokenizer != null) {
                String[] postTokenize = this.postTokenizer.postTokenize(str);
                str2 = postTokenize[0];
                str3 = postTokenize[1];
            }
            BaseAdornedWord baseAdornedWord = new BaseAdornedWord(str);
            baseAdornedWord.setSpelling(str2);
            baseAdornedWord.setStandardSpelling(str3);
            createNewList.add(baseAdornedWord);
        }
        tagAdornedWordList(createNewList);
        return createNewList;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public <T extends AdornedWord> List<T> tagAdornedWordSentence(List<T> list) {
        for (int i = 0; i < list.size(); i++) {
            T t = list.get(i);
            String token = t.getToken();
            String str = token;
            String str2 = token;
            if (this.postTokenizer != null) {
                String[] postTokenize = this.postTokenizer.postTokenize(token);
                str = postTokenize[0];
                str2 = postTokenize[1];
            }
            t.setSpelling(str);
            t.setStandardSpelling(str2);
        }
        tagAdornedWordList(list);
        return list;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger
    public abstract <T extends AdornedWord> List<T> tagAdornedWordList(List<T> list);
}
