package edu.northwestern.at.utils.corpuslinguistics.postagger.guesser;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.MutableInteger;
import edu.northwestern.at.utils.RomanNumeralUtils;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.cache.Cache;
import edu.northwestern.at.utils.cache.LRUCache;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.utils.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.utils.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.utils.corpuslinguistics.spellingstandardizer.ExtendedSimpleSpellingStandardizer;
import edu.northwestern.at.utils.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.Abbreviations;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:edu/northwestern/at/utils/corpuslinguistics/postagger/guesser/AbstractPartOfSpeechGuesser.class */
public abstract class AbstractPartOfSpeechGuesser extends IsCloseableObject implements PartOfSpeechGuesser, UsesLogger {
    protected Lexicon wordLexicon;
    protected Lexicon suffixLexicon;
    protected SpellingStandardizer spellingStandardizer;
    protected boolean debug = true;
    protected Logger logger = new DummyLogger();
    protected Cache<String, Map<String, MutableInteger>> cachedWords = new LRUCache(2000);
    protected Map<String, Lexicon> cachedLexicons = MapFactory.createNewMap();
    protected SpellingStandardizer auxiliarySpellingStandardizer = new ExtendedSimpleSpellingStandardizer();
    protected List<TaggedStrings> auxiliaryWordLists = ListFactory.createNewList();
    protected Names names = new Names();
    protected boolean tryStandardSpellings = true;
    protected boolean checkPossessives = false;

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public Logger getLogger() {
        return this.logger;
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public SpellingStandardizer getSpellingStandardizer() {
        return this.spellingStandardizer;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public void setSpellingStandardizer(SpellingStandardizer spellingStandardizer) {
        this.spellingStandardizer = spellingStandardizer;
        this.auxiliarySpellingStandardizer.setMappedSpellings(spellingStandardizer.getMappedSpellings());
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public Lexicon getWordLexicon() {
        return this.wordLexicon;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public void setWordLexicon(Lexicon lexicon) {
        this.wordLexicon = lexicon;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public Lexicon getSuffixLexicon() {
        return this.suffixLexicon;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public void setSuffixLexicon(Lexicon lexicon) {
        this.suffixLexicon = lexicon;
    }

    protected void addCachedWord(String str, Map<String, MutableInteger> map) {
        this.cachedWords.put(str, map);
    }

    protected Map<String, MutableInteger> posTagToMap(String str) {
        Map<String, MutableInteger> createNewMap = MapFactory.createNewMap();
        createNewMap.put(str, new MutableInteger(Math.max(1, this.wordLexicon.getCategoryCount(str))));
        return createNewMap;
    }

    protected Map<String, MutableInteger> posTagsToMap(String[] strArr) {
        Map<String, MutableInteger> createNewMap = MapFactory.createNewMap();
        for (int i = 0; i < strArr.length; i++) {
            createNewMap.put(strArr[i], new MutableInteger(Math.max(1, this.wordLexicon.getCategoryCount(strArr[i]))));
        }
        return createNewMap;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map<String, MutableInteger> clonePosTagMap(Map<String, MutableInteger> map) {
        Map<String, MutableInteger> map2 = null;
        if (map != null) {
            map2 = MapFactory.createNewMap(map.size());
            for (String str : map.keySet()) {
                map2.put(new String(str), new MutableInteger(map.get(str).intValue()));
            }
        }
        return map2;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public void addAuxiliaryWordList(TaggedStrings taggedStrings) {
        if (taggedStrings == null || taggedStrings.getStringCount() <= 0) {
            return;
        }
        this.auxiliaryWordLists.add(taggedStrings);
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public List<TaggedStrings> getAuxiliaryWordLists() {
        return this.auxiliaryWordLists;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public Lexicon getCachedLexiconForWord(String str) {
        Lexicon lexicon = this.cachedLexicons.get(str);
        if (lexicon == null) {
            lexicon = this.wordLexicon;
        }
        return lexicon;
    }

    public Map<String, MutableInteger> checkCachedWord(String str) {
        Map<String, MutableInteger> map = null;
        if (this.cachedWords.containsKey(str)) {
            map = this.cachedWords.get(str);
        } else {
            String lowerCase = str.toLowerCase();
            if (this.cachedWords.containsKey(lowerCase)) {
                map = this.cachedWords.get(lowerCase);
            }
        }
        return map;
    }

    public Map<String, MutableInteger> checkName(String str) {
        Map<String, MutableInteger> map = null;
        if (this.names.isNameOrPlace(str)) {
            map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSingularProperNounTag());
            addCachedWord(str, map);
        } else if (CharUtils.isAllCaps(str)) {
            if (this.names.isNameOrPlace(CharUtils.capitalizeFirstLetter(str))) {
                map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSingularProperNounTag());
                addCachedWord(str, map);
            }
        }
        return map;
    }

    public Map<String, MutableInteger> checkPossessiveNoun(String str) {
        Map<String, MutableInteger> map = null;
        if (this.checkPossessives && CharUtils.endsWithSingleQuoteS(str)) {
            String substring = str.substring(0, str.length() - 2);
            map = this.names.isNameOrPlace(substring) ? posTagToMap(this.wordLexicon.getPartOfSpeechTags().getPossessiveSingularProperNounTag()) : Character.isUpperCase(substring.charAt(0)) ? posTagToMap(this.wordLexicon.getPartOfSpeechTags().getPossessiveSingularProperNounTag()) : posTagToMap(this.wordLexicon.getPartOfSpeechTags().getPossessiveSingularNounTag());
        }
        if (map != null) {
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkAllUpperCase(String str) {
        Map<String, MutableInteger> map = null;
        if (CharUtils.allLettersCapital(str)) {
            map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSingularProperNounTag());
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkNumber(String str) {
        Map<String, MutableInteger> map = null;
        String replaceAll = str.replaceAll(",", "");
        if (CharUtils.isNumber(replaceAll)) {
            map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getCardinalNumberTag());
            addCachedWord(str, map);
        } else if (CharUtils.isOrdinal(replaceAll)) {
            map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getOrdinalNumberTag());
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkCurrency(String str) {
        Map<String, MutableInteger> map = null;
        if (CharUtils.isCurrency(str) || CharUtils.isUSCurrency(str) || CharUtils.isUSCurrencyCents(str)) {
            map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getCardinalNumberTag());
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkRomanNumeral(String str) {
        Map<String, MutableInteger> map = null;
        if (RomanNumeralUtils.isLooseRomanNumeral(str)) {
            map = Abbreviations.isInitial(str) ? posTagsToMap(new String[]{this.wordLexicon.getPartOfSpeechTags().getCardinalNumberTag(), this.wordLexicon.getPartOfSpeechTags().getSingularProperNounTag()}) : posTagToMap(this.wordLexicon.getPartOfSpeechTags().getCardinalNumberTag());
            addCachedWord(str, map);
        } else if (RomanNumeralUtils.isLooseOrdinalRomanNumeral(str)) {
            map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getOrdinalNumberTag());
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkAuxiliaryWordLists(String str) {
        Map<String, MutableInteger> map = null;
        int i = 0;
        while (true) {
            if (i >= this.auxiliaryWordLists.size()) {
                break;
            }
            TaggedStrings taggedStrings = this.auxiliaryWordLists.get(i);
            if (taggedStrings.containsString(str)) {
                map = posTagsToMap(StringUtils.makeTokenArray(taggedStrings.getTag(str)));
                break;
            }
            String lowerCase = str.toLowerCase();
            if (taggedStrings.containsString(lowerCase)) {
                map = posTagsToMap(StringUtils.makeTokenArray(taggedStrings.getTag(lowerCase)));
                break;
            }
            i++;
        }
        return map;
    }

    public Map<String, MutableInteger> checkPunctuation(String str) {
        Map<String, MutableInteger> map = null;
        if (CharUtils.isPunctuation(str)) {
            map = posTagToMap(str);
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkSymbol(String str) {
        Map<String, MutableInteger> map = null;
        if (CharUtils.isSymbol(str)) {
            map = posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSymbolTag());
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkAbbreviation(String str) {
        Map<String, MutableInteger> map = null;
        if (Abbreviations.isAbbreviation(str)) {
            map = Character.isUpperCase(str.charAt(0)) ? posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSingularProperNounTag()) : posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSingularNounTag());
            addCachedWord(str, map);
        }
        return map;
    }

    public Map<String, MutableInteger> checkHyphenatedWord(String str) {
        Map<String, MutableInteger> map = null;
        int lastIndexOf = str.lastIndexOf(45);
        if (lastIndexOf >= 0) {
            if (str.matches("[A-Z](--|---)'(s|S)")) {
                map = posTagsToMap(new String[]{this.wordLexicon.getPartOfSpeechTags().getPossessiveSingularProperNounTag()});
            } else if (str.matches("[a-z](--|---)'(s|S)")) {
                map = posTagsToMap(new String[]{this.wordLexicon.getPartOfSpeechTags().getPossessiveSingularNounTag()});
            } else if (str.matches("(--|---)'(s|S)")) {
                map = posTagsToMap(new String[]{this.wordLexicon.getPartOfSpeechTags().getPossessiveSingularNounTag(), this.wordLexicon.getPartOfSpeechTags().getPossessiveSingularProperNounTag()});
            } else if (str.matches("[A-Z](--|---)")) {
                map = posTagsToMap(new String[]{this.wordLexicon.getPartOfSpeechTags().getSingularProperNounTag(), this.wordLexicon.getPartOfSpeechTags().getInterjectionTag()});
            } else if (str.matches("[a-z](--|---)")) {
                map = posTagsToMap(new String[]{this.wordLexicon.getPartOfSpeechTags().getSingularNounTag(), this.wordLexicon.getPartOfSpeechTags().getInterjectionTag()});
            } else if (lastIndexOf < str.length() - 1) {
                String substring = str.substring(lastIndexOf + 1);
                map = this.wordLexicon.getCategoryCountsForEntry(substring);
                if (map == null) {
                    map = clonePosTagMap(this.wordLexicon.getCategoryCountsForEntry(substring.toLowerCase()));
                }
            }
            if (map != null) {
                addCachedWord(str, map);
            }
        }
        return map;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String[] getStandardizedSpellings(String str) {
        String[] strArr = null;
        if (this.tryStandardSpellings) {
            if (this.spellingStandardizer != null) {
                strArr = this.spellingStandardizer.standardizeSpelling(str);
                if (strArr.length == 0) {
                    strArr = this.spellingStandardizer.standardizeSpelling(str.toLowerCase());
                }
            }
        } else if (this.auxiliarySpellingStandardizer != null) {
            strArr = this.auxiliarySpellingStandardizer.standardizeSpelling(str);
            if (strArr.length == 0) {
                strArr = this.auxiliarySpellingStandardizer.standardizeSpelling(str.toLowerCase());
            }
        }
        return strArr;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map<String, MutableInteger> checkStandardSpellings(String str, String[] strArr) {
        Map<String, MutableInteger> map = null;
        if (this.spellingStandardizer != null) {
            switch (strArr.length) {
                case 0:
                    break;
                default:
                    String str2 = strArr[0];
                    if (this.wordLexicon.getEntryCount(str2) <= 0) {
                        map = checkName(str2);
                        break;
                    } else {
                        Map<String, MutableInteger> clonePosTagMap = clonePosTagMap(this.wordLexicon.getCategoryCountsForEntry(str2));
                        addCachedWord(str, clonePosTagMap);
                        return clonePosTagMap;
                    }
            }
        }
        return map;
    }

    public void removeProperNounTags(Map<String, MutableInteger> map) {
        PartOfSpeechTags partOfSpeechTags = this.wordLexicon.getPartOfSpeechTags();
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            String next = it.next();
            if (partOfSpeechTags.isProperNounTag(next) || partOfSpeechTags.isProperAdjectiveTag(next)) {
                it.remove();
            }
        }
        if (map.size() == 0) {
        }
    }

    public void removeCompoundTags(Map<String, MutableInteger> map) {
        PartOfSpeechTags partOfSpeechTags = this.wordLexicon.getPartOfSpeechTags();
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            if (partOfSpeechTags.isCompoundTag(it.next())) {
                it.remove();
            }
        }
        if (map.size() == 0) {
        }
    }

    public Map<String, MutableInteger> checkSuffixes(String str) {
        Map<String, MutableInteger> map = null;
        int longestEntryLength = this.suffixLexicon.getLongestEntryLength();
        int shortestEntryLength = this.suffixLexicon.getShortestEntryLength();
        int length = str.length();
        boolean hasCapitalLetter = CharUtils.hasCapitalLetter(str);
        boolean hasApostrophe = CharUtils.hasApostrophe(str);
        for (int min = Math.min(longestEntryLength, length); min > shortestEntryLength - 1; min--) {
            String substring = str.substring(length - min, length);
            if (this.suffixLexicon.getEntryCount(substring) > 0) {
                map = clonePosTagMap(this.suffixLexicon.getCategoryCountsForEntry(substring));
                if (!hasCapitalLetter) {
                    removeProperNounTags(map);
                }
                if (!hasApostrophe) {
                    removeCompoundTags(map);
                }
                if (map != null && map.size() > 0) {
                    addCachedWord(str, map);
                    this.cachedLexicons.put(str, this.suffixLexicon);
                    return map;
                }
            }
        }
        if (map != null && map.size() == 0) {
            map = null;
        }
        return map;
    }

    public Map<String, MutableInteger> checkSuffixes(String str, String[] strArr) {
        Map<String, MutableInteger> map = null;
        if (!CharUtils.hasApostrophe(str) || str.endsWith("'s") || !this.tryStandardSpellings || strArr == null || strArr.length <= 0) {
            map = checkSuffixes(str);
            if (map == null && this.tryStandardSpellings && strArr != null && strArr.length > 0) {
                for (String str2 : strArr) {
                    map = checkSuffixes(str2);
                    if (map != null) {
                        break;
                    }
                }
            }
        } else {
            for (String str3 : strArr) {
                map = checkSuffixes(str3);
                if (map != null) {
                    break;
                }
            }
            if (map == null) {
                map = checkSuffixes(str);
            }
        }
        return map;
    }

    public Map<String, MutableInteger> getNoun(String str) {
        char charAt = str.charAt(0);
        char charAt2 = str.charAt(str.length() - 1);
        Map<String, MutableInteger> posTagToMap = Character.isUpperCase(charAt) ? (charAt2 == 's' || charAt2 == 'S') ? posTagToMap(this.wordLexicon.getPartOfSpeechTags().getPluralProperNounTag()) : posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSingularProperNounTag()) : (charAt2 == 's' || charAt2 == 'S') ? posTagToMap(this.wordLexicon.getPartOfSpeechTags().getPluralNounTag()) : posTagToMap(this.wordLexicon.getPartOfSpeechTags().getSingularNounTag());
        addCachedWord(str, posTagToMap);
        return posTagToMap;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public Map<String, MutableInteger> guessPartsOfSpeech(String str, boolean z) {
        return guessPartsOfSpeech(str);
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public void setTryStandardSpellings(boolean z) {
        this.tryStandardSpellings = z;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public void setCheckPossessives(boolean z) {
        this.checkPossessives = z;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser
    public abstract Map<String, MutableInteger> guessPartsOfSpeech(String str);
}
