package edu.northwestern.at.utils.corpuslinguistics.namestandardizer;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ScoredString;
import edu.northwestern.at.utils.SortedArrayList;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TernaryTrie;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.DefaultLexicon;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.utils.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.utils.corpuslinguistics.stringsimilarity.LetterPairSimilarity;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

/* loaded from: input_file:edu/northwestern/at/utils/corpuslinguistics/namestandardizer/AbstractNameStandardizer.class */
public abstract class AbstractNameStandardizer extends IsCloseableObject implements NameStandardizer, UsesLogger {
    protected static TernaryTrie nameTrie;
    protected static TernaryTrie consonantTrie;
    protected Logger logger = new DummyLogger();

    public AbstractNameStandardizer() {
        nameTrie = new TernaryTrie();
        consonantTrie = new TernaryTrie();
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizer
    public void loadNames(String str) throws IOException {
        DefaultLexicon defaultLexicon = new DefaultLexicon();
        defaultLexicon.loadLexicon(new File(str).toURI().toURL(), "utf-8");
        loadNamesFromLexicon(defaultLexicon);
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizer
    public void loadNamesFromLexicon(Lexicon lexicon) throws IOException {
        if (lexicon != null) {
            PartOfSpeechTags partOfSpeechTags = lexicon.getPartOfSpeechTags();
            String singularProperNounTag = partOfSpeechTags.getSingularProperNounTag();
            String pluralProperNounTag = partOfSpeechTags.getPluralProperNounTag();
            for (String str : lexicon.getEntries()) {
                Set<String> categoriesForEntry = lexicon.getCategoriesForEntry(str);
                if (categoriesForEntry.contains(singularProperNounTag) || categoriesForEntry.contains(pluralProperNounTag)) {
                    String lowerCase = str.toLowerCase();
                    nameTrie.put(lowerCase, lowerCase);
                    String stripChars = StringUtils.stripChars(lowerCase, "aeiouy");
                    Set set = (Set) consonantTrie.get(stripChars);
                    if (set == null) {
                        TreeSet treeSet = new TreeSet();
                        treeSet.add(lowerCase);
                        consonantTrie.put(stripChars, treeSet);
                    } else {
                        set.add(lowerCase);
                    }
                }
            }
        }
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizer
    public int getNumberOfNames() {
        return nameTrie.size();
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizer
    public boolean dontStandardize(String str) {
        return str.indexOf(".") >= 0;
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizer
    public String standardizeProperName(String str) {
        if (dontStandardize(str)) {
            return str;
        }
        String preprocessProperName = preprocessProperName(str);
        String lowerCase = preprocessProperName.toLowerCase();
        List<String> nearSearch = nameTrie.nearSearch(lowerCase, 2);
        SortedArrayList sortedArrayList = new SortedArrayList();
        if (nearSearch.size() > 0) {
            for (int i = 0; i < nearSearch.size(); i++) {
                sortedArrayList.add(new ScoredString(nearSearch.get(i), LetterPairSimilarity.letterPairSimilarity(lowerCase, nearSearch.get(i))));
            }
        } else {
            List<String> nearSearch2 = consonantTrie.nearSearch(StringUtils.stripChars(lowerCase, "aeiouy"), 3);
            for (int i2 = 0; i2 < nearSearch2.size(); i2++) {
                for (String str2 : (Set) consonantTrie.get(nearSearch2.get(i2))) {
                    double letterPairSimilarity = LetterPairSimilarity.letterPairSimilarity(lowerCase, str2);
                    if (letterPairSimilarity >= 0.75d) {
                        sortedArrayList.add(new ScoredString(str2, letterPairSimilarity));
                    }
                }
            }
        }
        if (sortedArrayList.size() > 0) {
            preprocessProperName = CharUtils.makeCaseMatch(((ScoredString) sortedArrayList.get(0)).getString(), str);
        }
        return preprocessProperName;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizer
    public String preprocessProperName(String str) {
        return str;
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public Logger getLogger() {
        return this.logger;
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public void setLogger(Logger logger) {
        this.logger = logger;
    }
}
