package edu.northwestern.at.utils.corpuslinguistics.lemmatizer;

import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.Map2D;
import edu.northwestern.at.utils.Map2DFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/* loaded from: input_file:edu/northwestern/at/utils/corpuslinguistics/lemmatizer/RuleBasedLemmatizer.class */
public class RuleBasedLemmatizer extends AbstractLemmatizer implements Lemmatizer {
    protected Map2D<String, String, String> irregularForms = Map2DFactory.createNewMap2D();
    protected Set<String> irregularFormsWordClasses = new TreeSet();
    protected Map<String, List<LemmatizerRule>> rules = MapFactory.createNewMap();
    protected Set<String> rulesWordClasses = new TreeSet();

    public void loadRules(URL url, String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new UnicodeReader(url.openStream(), str));
        String str2 = "";
        String[] strArr = new String[2];
        List<LemmatizerRule> createNewList = ListFactory.createNewList();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String trim = readLine.trim();
            if (trim.length() > 0 && trim.charAt(0) != '#') {
                String[] makeTokenArray = StringUtils.makeTokenArray(trim);
                if (makeTokenArray.length > 0) {
                    int length = makeTokenArray[0].length();
                    if (makeTokenArray[0].charAt(length - 1) == ':') {
                        if (createNewList.size() > 0) {
                            this.rules.put(str2, createNewList);
                            createNewList = ListFactory.createNewList();
                        }
                        str2 = makeTokenArray[0].substring(0, length - 1);
                        this.rulesWordClasses.add(str2);
                    } else {
                        createNewList.add(new DefaultLemmatizerRule(trim));
                    }
                }
            }
        }
        if (createNewList.size() > 0) {
            this.rules.put(str2, createNewList);
        }
        bufferedReader.close();
    }

    public void loadIrregularForms(URL url, String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new UnicodeReader(url.openStream(), str));
        String str2 = "";
        String[] strArr = new String[2];
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            String trim = readLine.trim();
            if (trim.length() > 0 && trim.charAt(0) != '#') {
                String[] makeTokenArray = StringUtils.makeTokenArray(trim);
                if (makeTokenArray.length > 0) {
                    int length = makeTokenArray[0].length();
                    if (makeTokenArray[0].charAt(length - 1) == ':') {
                        str2 = makeTokenArray[0].substring(0, length - 1);
                        this.irregularFormsWordClasses.add(str2);
                    } else {
                        this.irregularForms.put(str2, makeTokenArray[0], makeTokenArray.length > 1 ? makeTokenArray[1] : makeTokenArray[0]);
                    }
                }
            }
        }
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.lemmatizer.AbstractLemmatizer, edu.northwestern.at.utils.corpuslinguistics.lemmatizer.Lemmatizer
    public String lemmatize(String str, String str2) {
        if (str2 == null) {
            return lemmatize(str);
        }
        String lowerCase = str2.trim().toLowerCase();
        if (str2.length() == 0) {
            return lemmatize(str);
        }
        if (cantLemmatize(str)) {
            return str;
        }
        String[] split = lowerCase.split(",");
        String lowerCase2 = str.toLowerCase();
        String str3 = this.irregularForms.get(lowerCase, str);
        if (str3 == null) {
            str3 = this.irregularForms.get(lowerCase, lowerCase2);
        }
        if (str3 == null) {
            str3 = lowerCase2;
            for (String str4 : split) {
                List<LemmatizerRule> list = this.rules.get(str4);
                if (list != null && list.size() > 0) {
                    LemmatizerRule[] lemmatizerRuleArr = (LemmatizerRule[]) list.toArray(new LemmatizerRule[list.size()]);
                    int i = 0;
                    while (true) {
                        if (i < lemmatizerRuleArr.length) {
                            String apply = lemmatizerRuleArr[i].apply(str3, this.dictionary);
                            if (!apply.equals(str3)) {
                                str3 = apply;
                                break;
                            }
                            i++;
                        }
                    }
                }
            }
        }
        return (str3 == null || str3.length() == 0) ? str : cleanUpLemma(str3);
    }

    public String cleanUpLemma(String str) {
        return StringUtils.replaceAll(str, "!", "");
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.lemmatizer.AbstractLemmatizer, edu.northwestern.at.utils.corpuslinguistics.lemmatizer.Lemmatizer
    public String lemmatize(String str) {
        String str2 = str;
        if (cantLemmatize(str)) {
            return str;
        }
        Iterator<String> it = this.irregularFormsWordClasses.iterator();
        while (it.hasNext()) {
            str2 = this.irregularForms.get(it.next().toLowerCase(), str);
            if (str2 != null && !str2.equals(str)) {
                return cleanUpLemma(str2);
            }
        }
        String lowerCase = str.toLowerCase();
        Iterator<String> it2 = this.rulesWordClasses.iterator();
        while (it2.hasNext()) {
            List<LemmatizerRule> list = this.rules.get(it2.next().toLowerCase());
            if (list != null && list.size() > 0) {
                for (LemmatizerRule lemmatizerRule : (LemmatizerRule[]) list.toArray(new LemmatizerRule[list.size()])) {
                    String apply = lemmatizerRule.apply(lowerCase, this.dictionary);
                    if (!apply.equals(lowerCase)) {
                        return cleanUpLemma(apply);
                    }
                }
            }
        }
        if (str2 == null || str2.length() == 0) {
            str2 = str;
        }
        return cleanUpLemma(str2);
    }
}
