package edu.northwestern.at.utils.corpuslinguistics.stemmer;

import edu.northwestern.at.utils.CharUtils;
import java.util.Vector;

/* loaded from: input_file:edu/northwestern/at/utils/corpuslinguistics/stemmer/LancasterStemmer.class */
public class LancasterStemmer implements Stemmer {
    public static final String[] prefixes = {"intra", "kilo", "mega", "micro", "milli", "nano", "pico", "pseudo", "ultra"};
    public static final String[] defaultStemmingRules = {"ai*2.     { -ia > -   if intact }", "a*1.      { -a > -    if intact }", "bb1.      { -bb > -b   }", "city3s.   { -ytic > -ys }", "ci2>      { -ic > -    }", "cn1t>     { -nc > -nt  }", "dd1.      { -dd > -d   }", "dei3y>    { -ied > -y  }", "deec2ss.  { -ceed > -cess }", "dee1.     { -eed > -ee }", "de2>      { -ed > -    }", "dooh4>    { -hood > -  }", "e1>       { -e > -     }", "feil1v.   { -lief > -liev }", "fi2>      { -if > -    }", "gni3>     { -ing > -   }", "gai3y.    { -iag > -y  }", "ga2>      { -ag > -    }", "gg1.      { -gg > -g   }", "ht*2.     { -th > -   if intact }", "hsiug5ct. { -guish > -ct }", "hsi3>     { -ish > -   }", "i*1.      { -i > -    if intact }", "i1y>      { -i > -y    }", "ji1d.     { -ij > -id   --  see nois4j> & vis3j> }", "juf1s.    { -fuj > -fus }", "ju1d.     { -uj > -ud  }", "jo1d.     { -oj > -od  }", "jeh1r.    { -hej > -her }", "jrev1t.   { -verj > -vert }", "jsim2t.   { -misj > -mit }", "jn1d.     { -nj > -nd  }", "j1s.      { -j > -s    }", "lbaifi6.  { -ifiabl > - }", "lbai4y.   { -iabl > -y }", "lba3>     { -abl > -   }", "lbi3.     { -ibl > -   }", "lib2l>    { -bil > -bl }", "lc1.      { -cl > c    }", "lufi4y.   { -iful > -y }", "luf3>     { -ful > -   }", "lu2.      { -ul > -    }", "lai3>     { -ial > -   }", "lau3>     { -ual > -   }", "la2>      { -al > -    }", "ll1.      { -ll > -l   }", "mui3.     { -ium > -   }", "mu*2.     { -um > -   if intact }", "msi3>     { -ism > -   }", "mm1.      { -mm > -m   }", "nois4j>   { -sion > -j }", "noix4ct.  { -xion > -ct }", "noi3>     { -ion > -   }", "nai3>     { -ian > -   }", "na2>      { -an > -    }", "nee0.     { protect  -een }", "ne2>      { -en > -    }", "nn1.      { -nn > -n   }", "pihs4>    { -ship > -  }", "pp1.      { -pp > -p   }", "re2>      { -er > -    }", "rae0.     { protect  -ear }", "ra2.      { -ar > -    }", "ro2>      { -or > -    }", "ru2>      { -ur > -    }", "rr1.      { -rr > -r   }", "rt1>      { -tr > -t   }", "rei3y>    { -ier > -y  }", "sei3y>    { -ies > -y  }", "sis2.     { -sis > -s  }", "si2>      { -is > -    }", "ssen4>    { -ness > -  }", "ss0.      { protect  -ss }", "suo3>     { -ous > -   }", "su*2.     { -us > -   if intact }", "s*1>      { -s > -    if intact }", "s0.       { -s > -s    }", "tacilp4y. { -plicat > -ply }", "ta2>      { -at > -    }", "tnem4>    { -ment > -  }", "tne3>     { -ent > -   }", "tna3>     { -ant > -   }", "tpir2b.   { -ript > -rib }", "tpro2b.   { -orpt > -orb }", "tcud1.    { -duct > -duc }", "tpmus2.   { -sumpt > -sum }", "tpec2iv.  { -cept > -ceiv }", "tulo2v.   { -olut > -olv }", "tsis0.    { protect  -sist }", "tsi3>     { -ist > -   }", "tt1.      { -tt > -t   }", "uqi3.     { -iqu > -   } ", "ugo1.     { -ogu > -og }", "vis3j>    { -siv > -j  }", "vie0.     { protect  -eiv }", "vi2>      { -iv > -    }", "ylb1>     { -bly > -bl }", "yli3y>    { -ily > -y  }", "ylp0.     { protect  -ply }", "yl2>      { -ly > -    }", "ygo1.     { -ogy > -og }", "yhp1.     { -phy > -ph }", "ymo1.     { -omy > -om }", "ypo1.     { -opy > -op }", "yti3>     { -ity > -   }", "yte3>     { -ety > -   }", "ytl2.     { -lty > -l  }", "yrtsi5.   { -istry > - }", "yra3>     { -ary > -   }", "yro3>     { -ory > -   }", "yfi3.     { -ify > -   }", "ycn2t>    { -ncy > -nt }", "yca3>     { -acy > -   }", "zi2>      { -iz > -    }", "zy1s.     { -yz > -ys  }", "end0."};
    protected static final char zeroDigit = '0';
    protected Vector<String> ruleTable;
    protected int[] ruleTableIndex;
    protected boolean preStrip;

    public LancasterStemmer() {
        this.preStrip = true;
        loadRules(defaultStemmingRules);
    }

    public LancasterStemmer(String[] strArr) {
        this.preStrip = true;
        loadRules(strArr);
    }

    public LancasterStemmer(String[] strArr, boolean z) {
        this.preStrip = z;
        loadRules(strArr);
    }

    protected void loadRules(String[] strArr) {
        this.ruleTable = new Vector<>();
        this.ruleTableIndex = new int[26];
        for (int i = 0; i < 25; i++) {
            this.ruleTableIndex[i] = 0;
        }
        for (String str : strArr) {
            this.ruleTable.addElement(str.replaceAll(" ", ""));
        }
        char c = 'a';
        for (int i2 = 0; i2 < strArr.length - 1; i2++) {
            while (this.ruleTable.elementAt(i2).charAt(0) != c) {
                c = (char) (c + 1);
                this.ruleTableIndex[charCode(c)] = i2;
            }
        }
    }

    protected int firstVowel(String str, int i) {
        char c = 'a';
        int i2 = 0;
        while (i2 < i && !vowel(str.charAt(i2), c)) {
            c = str.charAt(i2);
            i2++;
        }
        return Math.min(i2, i);
    }

    /* JADX WARN: Multi-variable type inference failed */
    protected String stripSuffixes(String str) {
        boolean z = false;
        int i = 0;
        boolean z2 = true;
        String clean = clean(str.toLowerCase());
        int i2 = 0;
        while (i2 + 1 < clean.length() && isLetter(clean.charAt(i2 + 1))) {
            i2++;
        }
        if (i2 < 1) {
            z = -1;
        } else {
            i = firstVowel(clean, i2);
            int length = clean.length() - 1;
        }
        while (z != -1) {
            z = false;
            char charAt = clean.charAt(i2);
            int i3 = (!isLetter(charAt) || charAt < 'a' || charAt > 'z') ? -1 : this.ruleTableIndex[charCode(charAt)];
            if (i3 == -1) {
                z = -1;
            } else {
                String elementAt = this.ruleTable.elementAt(i3);
                while (!z) {
                    boolean z3 = false;
                    if (elementAt.charAt(0) != charAt) {
                        z = -1;
                        z3 = -1;
                    }
                    int i4 = 1;
                    int i5 = i2 - 1;
                    while (!z3) {
                        if (isDigit(elementAt.charAt(i4))) {
                            z3 = true;
                        } else if (elementAt.charAt(i4) == '*') {
                            if (z2) {
                                i4++;
                                z3 = true;
                            } else {
                                z3 = -1;
                            }
                        } else if (elementAt.charAt(i4) != clean.charAt(i5)) {
                            z3 = -1;
                        } else if (i5 <= i) {
                            z3 = -1;
                        } else {
                            i4++;
                            i5--;
                        }
                    }
                    if (z3) {
                        int i6 = 0;
                        while (true) {
                            if (elementAt.charAt(i4 + i6 + 1) >= '.' && elementAt.charAt(i4 + i6 + 1) <= '>') {
                                break;
                            }
                            i6++;
                        }
                        int charAt2 = ((i2 + i6) + zeroDigit) - elementAt.charAt(i4);
                        if (i == 0) {
                            if (charAt2 < 1) {
                                z3 = -1;
                            }
                        } else if (charAt2 < 2 || charAt2 < i) {
                            z3 = -1;
                        }
                    }
                    if (z3) {
                        z2 = false;
                        i2 = (i2 + zeroDigit) - elementAt.charAt(i4);
                        int i7 = i4 + 1;
                        clean = clean.substring(0, i2 + 1);
                        while (i7 < elementAt.length() && isLetter(elementAt.charAt(i7))) {
                            clean = clean + elementAt.charAt(i7);
                            i7++;
                            i2++;
                        }
                        z = elementAt.charAt(i7) == '.' ? -1 : true;
                    } else {
                        i3++;
                        elementAt = this.ruleTable.elementAt(i3);
                        if (elementAt.charAt(0) != charAt) {
                            z = -1;
                        }
                    }
                }
            }
        }
        return clean;
    }

    protected boolean vowel(char c, char c2) {
        boolean isEnglishVowel = CharUtils.isEnglishVowel(c);
        if (!isEnglishVowel && c == 'y') {
            isEnglishVowel = !CharUtils.isEnglishVowel(c2);
        }
        return isEnglishVowel;
    }

    protected boolean isDigit(char c) {
        return CharUtils.isDigit(c);
    }

    protected boolean isLetter(char c) {
        return CharUtils.isLetter(c);
    }

    protected int charCode(char c) {
        return c - 'a';
    }

    protected String stripPrefixes(String str) {
        String str2 = str;
        String lowerCase = str.toLowerCase();
        int i = 0;
        while (true) {
            if (i < prefixes.length) {
                if (lowerCase.startsWith(prefixes[i]) && lowerCase.length() > prefixes[i].length()) {
                    str2 = str.substring(prefixes[i].length());
                    break;
                }
                i++;
            } else {
                break;
            }
        }
        return str2;
    }

    protected String clean(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < str.length(); i++) {
            if (isLetter(str.charAt(i))) {
                stringBuffer.append(str.charAt(i));
            }
        }
        return stringBuffer.toString();
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.stemmer.Stemmer
    public String stem(String str) {
        String str2 = str;
        if (str2.length() > 3 && this.preStrip) {
            str2 = stripPrefixes(str2);
        }
        if (str2.length() > 3) {
            str2 = stripSuffixes(str2);
        }
        return str2;
    }
}
