package LBJ2.nlp;

import LBJ2.learn.WekaWrapper;
import LBJ2.parse.LinkedChild;
import LBJ2.parse.LinkedVector;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:LBJ2/nlp/Sentence.class */
public class Sentence extends LinkedChild {
    private static final String[] protocols = {"telnet", "https", "file", "http", "nntp", "smtp"};
    private static final String[] topLevelDomains = {"museum", "travel", "aero", "arpa", "coop", "info", "jobs", "name", "biz", "com", "edu", "gov", "int", "mil", "net", "org", "pro", "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "br", "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kr", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", "sr", "st", "su", "sv", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "um", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "yu", "za", "zm", "zw"};
    private boolean[] inURL;
    public String text;

    public Sentence(String str) {
        this.inURL = null;
        this.text = WekaWrapper.defaultAttributeString;
        this.text = str;
    }

    public Sentence(String str, int i, int i2) {
        super(i, i2);
        this.inURL = null;
        this.text = WekaWrapper.defaultAttributeString;
        this.text = str;
    }

    private void myAdd(LinkedList linkedList, int i, String str) {
        linkedList.add(new Integer(i));
    }

    public LinkedVector wordSplit() {
        LinkedList linkedList = new LinkedList();
        Matcher matcher = Pattern.compile("\\s+").matcher(this.text);
        while (matcher.find()) {
            myAdd(linkedList, matcher.start() - 1, ")whitespace");
            myAdd(linkedList, matcher.end(), "(whitespace");
        }
        Integer num = null;
        Integer num2 = null;
        if (linkedList.size() > 0) {
            num = (Integer) linkedList.getFirst();
            num2 = (Integer) linkedList.getLast();
        }
        if (num == null || num.intValue() != -1) {
            myAdd(linkedList, 0, "(^");
        } else {
            linkedList.removeFirst();
        }
        if (num2 == null || num2.intValue() < this.text.length()) {
            myAdd(linkedList, this.text.length() - 1, ")$");
        } else {
            linkedList.remove(num2);
        }
        Pattern compile = Pattern.compile("[^\\s\\d]");
        Pattern compile2 = Pattern.compile("\\d");
        Pattern compile3 = Pattern.compile("\\d,\\D");
        Pattern compile4 = Pattern.compile("\\D,\\d");
        int indexOf = this.text.indexOf(44);
        while (true) {
            int i = indexOf;
            if (i == -1) {
                break;
            }
            if (i > 0 && (compile.matcher(this.text.substring(i - 1, i)).find() || ((i + 1 == this.text.length() && compile2.matcher(this.text.substring(i - 1, i)).find()) || (i + 1 < this.text.length() && compile3.matcher(this.text.substring(i - 1, i + 2)).find())))) {
                myAdd(linkedList, i - 1, ")comma1");
                myAdd(linkedList, i, "(comma1");
            }
            if (i + 1 < this.text.length() && (compile.matcher(this.text.substring(i + 1, i + 2)).find() || ((i == 0 && compile2.matcher(this.text.substring(i + 1, i + 2)).find()) || (i > 0 && compile4.matcher(this.text.substring(i - 1, i + 2)).find())))) {
                myAdd(linkedList, i, ")comma2");
                myAdd(linkedList, i + 1, "(comma2");
            }
            indexOf = this.text.indexOf(44, i + 1);
        }
        Pattern compile5 = Pattern.compile("[^\\s,']");
        Pattern.compile("[A-Za-z]'[A-Za-z]");
        Pattern.compile("s[^A-Za-z']");
        Pattern.compile("ll[^A-Za-z']");
        int indexOf2 = this.text.indexOf(39);
        while (true) {
            int i2 = indexOf2;
            if (i2 == -1) {
                break;
            }
            if (i2 - 1 > 0 && Character.isLetter(this.text.charAt(i2 - 2)) && this.text.charAt(i2 - 1) == 'n' && i2 + 1 < this.text.length() && this.text.charAt(i2 + 1) == 't' && (i2 + 2 == this.text.length() || !(Character.isLetter(this.text.charAt(i2 + 2)) || this.text.charAt(i2 + 2) == '\''))) {
                myAdd(linkedList, i2 - 2, ")n't");
                myAdd(linkedList, i2 - 1, "(n't");
            } else if (i2 > 0 && ((compile5.matcher(this.text.substring(i2 - 1, i2)).find() && i2 + 1 < this.text.length() && this.text.charAt(i2 + 1) == '\'') || ((this.text.charAt(i2 - 1) == 's' && (i2 + 1 == this.text.length() || (!Character.isLetter(this.text.charAt(i2 + 1)) && this.text.charAt(i2 + 1) != '\''))) || ((Character.isLetter(this.text.charAt(i2 - 1)) && ((i2 + 1 < this.text.length() && ((i2 + 2 == this.text.length() || (!Character.isLetter(this.text.charAt(i2 + 2)) && this.text.charAt(i2 + 2) != '\'')) && (this.text.charAt(i2 + 1) == 'd' || this.text.charAt(i2 + 1) == 'm' || this.text.charAt(i2 + 1) == 's'))) || (i2 + 2 < this.text.length() && ((i2 + 3 == this.text.length() || (!Character.isLetter(this.text.charAt(i2 + 3)) && this.text.charAt(i2 + 3) != '\'')) && (this.text.substring(i2 + 1, i2 + 3).equals("ll") || this.text.substring(i2 + 1, i2 + 3).equals("re") || this.text.substring(i2 + 1, i2 + 3).equals("ve")))))) || (this.text.charAt(i2 - 1) == '.' && i2 - 1 > 0 && Character.isLetter(this.text.charAt(i2 - 2)) && i2 + 1 < this.text.length() && ((i2 + 2 == this.text.length() || (!Character.isLetter(this.text.charAt(i2 + 2)) && this.text.charAt(i2 + 2) != '\'')) && this.text.charAt(i2 + 1) == 's')))))) {
                myAdd(linkedList, i2 - 1, ")contraction1");
                myAdd(linkedList, i2, "(contraction1");
            }
            if (i2 + 1 < this.text.length() && compile5.matcher(this.text.substring(i2 + 1, i2 + 2)).find() && (!Character.isLetter(this.text.charAt(i2 + 1)) || (i2 > 0 && this.text.charAt(i2 - 1) == '\''))) {
                myAdd(linkedList, i2, ")contraction2");
                myAdd(linkedList, i2 + 1, "(contraction2");
            }
            indexOf2 = this.text.indexOf(39, i2 + 1);
        }
        Pattern compile6 = Pattern.compile("[^\\s,':]");
        Pattern compile7 = Pattern.compile("\\d:\\d");
        int indexOf3 = this.text.indexOf(58);
        while (true) {
            int i3 = indexOf3;
            if (i3 == -1) {
                break;
            }
            if ((i3 < 2 || i3 + 2 >= this.text.length() || !compile7.matcher(this.text.substring(i3 - 2, i3 + 3)).find()) && ((i3 <= 2 || i3 + 2 >= this.text.length() || (!this.text.substring(i3 - 2, i3 + 3).equals("tp://") && !this.text.substring(i3 - 2, i3 + 3).equals("TP://"))) && !partOfURL(i3))) {
                if (i3 >= 1 && compile6.matcher(this.text.substring(i3 - 1, i3)).find()) {
                    myAdd(linkedList, i3 - 1, ")colon1");
                    myAdd(linkedList, i3, "(colon1");
                }
                if (i3 + 1 < this.text.length() && compile6.matcher(this.text.substring(i3 + 1, i3 + 2)).find()) {
                    myAdd(linkedList, i3, ")colon2");
                    myAdd(linkedList, i3 + 1, "(colon2");
                }
            }
            indexOf3 = this.text.indexOf(58, i3 + 1);
        }
        Pattern compile8 = Pattern.compile("[^\\s,':/]");
        Pattern compile9 = Pattern.compile("\\d/\\d");
        int indexOf4 = this.text.indexOf(47);
        while (true) {
            int i4 = indexOf4;
            if (i4 == -1) {
                break;
            }
            if ((i4 < 2 || i4 + 2 >= this.text.length() || !compile9.matcher(this.text.substring(i4 - 2, i4 + 3)).find()) && ((i4 <= 3 || i4 + 1 >= this.text.length() || (!this.text.substring(i4 - 3, i4 + 2).equals("tp://") && !this.text.substring(i4 - 3, i4 + 2).equals("TP://"))) && ((i4 <= 4 || (!this.text.substring(i4 - 4, i4 + 1).equals("tp://") && !this.text.substring(i4 - 4, i4 + 1).equals("TP://"))) && !partOfURL(i4)))) {
                if (i4 >= 1 && compile8.matcher(this.text.substring(i4 - 1, i4)).find()) {
                    myAdd(linkedList, i4 - 1, ")slash1");
                    myAdd(linkedList, i4, "(slash1");
                }
                if (i4 + 1 < this.text.length() && compile8.matcher(this.text.substring(i4 + 1, i4 + 2)).find()) {
                    myAdd(linkedList, i4, ")slash2");
                    myAdd(linkedList, i4 + 1, "(slash2");
                }
            }
            indexOf4 = this.text.indexOf(47, i4 + 1);
        }
        Pattern compile10 = Pattern.compile("[^\\s,':/-]");
        Pattern compile11 = Pattern.compile("\\d-\\d");
        Pattern compile12 = Pattern.compile("-\\.?\\d");
        Pattern compile13 = Pattern.compile("\\s-\\.?\\d");
        int indexOf5 = this.text.indexOf(45);
        while (true) {
            int i5 = indexOf5;
            if (i5 == -1) {
                break;
            }
            if ((i5 + 2 >= this.text.length() || ((i5 < 2 || !compile11.matcher(this.text.substring(i5 - 2, i5 + 3)).find()) && ((i5 != 0 || !compile12.matcher(this.text.substring(i5, i5 + 3)).find()) && (i5 <= 0 || !compile13.matcher(this.text.substring(i5 - 1, i5 + 3)).find())))) && !partOfURL(i5)) {
                if (i5 >= 1 && compile10.matcher(this.text.substring(i5 - 1, i5)).find()) {
                    myAdd(linkedList, i5 - 1, ")dash1");
                    myAdd(linkedList, i5, "(dash1");
                }
                if (i5 + 1 < this.text.length() && compile10.matcher(this.text.substring(i5 + 1, i5 + 2)).find()) {
                    myAdd(linkedList, i5, ")dash2");
                    myAdd(linkedList, i5 + 1, "(dash2");
                }
            }
            indexOf5 = this.text.indexOf(45, i5 + 1);
        }
        Pattern compile14 = Pattern.compile("[^\\s,':/\\$-]");
        Pattern compile15 = Pattern.compile("\\$\\.?\\d");
        Pattern compile16 = Pattern.compile("(\\s|-)\\$\\.?\\d");
        int indexOf6 = this.text.indexOf(36);
        while (true) {
            int i6 = indexOf6;
            if (i6 == -1) {
                break;
            }
            if ((i6 != 0 || i6 + 2 >= this.text.length() || !compile15.matcher(this.text.substring(i6, i6 + 3)).find()) && ((i6 <= 0 || i6 + 2 >= this.text.length() || !compile16.matcher(this.text.substring(i6 - 1, i6 + 3)).find()) && !partOfURL(i6))) {
                if (i6 >= 1 && compile14.matcher(this.text.substring(i6 - 1, i6)).find()) {
                    myAdd(linkedList, i6 - 1, ")dollar1");
                    myAdd(linkedList, i6, "(dollar1");
                }
                if (i6 + 1 < this.text.length() && compile14.matcher(this.text.substring(i6 + 1, i6 + 2)).find()) {
                    myAdd(linkedList, i6, ")dollar2");
                    myAdd(linkedList, i6 + 1, "(dollar2");
                }
            }
            indexOf6 = this.text.indexOf(36, i6 + 1);
        }
        Pattern compile17 = Pattern.compile("[^\\s,':/\\$\\.-]\\.\\.\\.");
        Pattern compile18 = Pattern.compile("\\.\\.\\.[^\\s,':/\\$\\.-]");
        int indexOf7 = this.text.indexOf(46);
        while (true) {
            int i7 = indexOf7;
            if (i7 == -1) {
                break;
            }
            if (i7 > 0 && i7 + 2 < this.text.length() && compile17.matcher(this.text.substring(i7 - 1, i7 + 3)).find()) {
                myAdd(linkedList, i7 - 1, ")ellipsis1");
                myAdd(linkedList, i7, "(ellipsis1");
            }
            if (i7 >= 2 && i7 + 1 < this.text.length() && compile18.matcher(this.text.substring(i7 - 2, i7 + 2)).find()) {
                myAdd(linkedList, i7, ")ellipsis2");
                myAdd(linkedList, i7 + 1, "(ellipsis2");
            }
            indexOf7 = this.text.indexOf(46, i7 + 1);
        }
        int lastIndexOf = this.text.lastIndexOf(46);
        if (lastIndexOf != -1) {
            boolean z = true;
            for (int i8 = lastIndexOf + 1; i8 < this.text.length() && z; i8++) {
                z = !Character.isLetterOrDigit(this.text.charAt(i8));
            }
            if (z) {
                if (lastIndexOf >= 1 && ((this.text.charAt(lastIndexOf - 1) != '.' || lastIndexOf == 1 || this.text.charAt(lastIndexOf - 2) != '.') && compile14.matcher(this.text.substring(lastIndexOf - 1, lastIndexOf)).find())) {
                    myAdd(linkedList, lastIndexOf - 1, ")period1");
                    myAdd(linkedList, lastIndexOf, "(period1");
                }
                if (lastIndexOf + 1 < this.text.length() && ((lastIndexOf == 0 || this.text.charAt(lastIndexOf - 1) != '.' || lastIndexOf == 1 || this.text.charAt(lastIndexOf - 2) != '.') && compile14.matcher(this.text.substring(lastIndexOf + 1, lastIndexOf + 2)).find())) {
                    myAdd(linkedList, lastIndexOf, ")period2");
                    myAdd(linkedList, lastIndexOf + 1, "(period2");
                }
            } else {
                lastIndexOf = -1;
            }
        }
        Pattern compile19 = Pattern.compile("[^\\s\\w,'\\.:/\\$-]");
        Matcher matcher2 = compile19.matcher(this.text);
        while (matcher2.find()) {
            if (!partOfURL(matcher2.start()) && matcher2.start() + 1 < this.text.length() && this.text.charAt(matcher2.start()) != this.text.charAt(matcher2.start() + 1) && matcher2.start() + 1 != lastIndexOf && compile19.matcher(this.text.substring(matcher2.start() + 1, matcher2.start() + 2)).find()) {
                myAdd(linkedList, matcher2.start(), ")punctuation1");
                myAdd(linkedList, matcher2.start() + 1, "(punctuation1");
            }
        }
        Matcher matcher3 = Pattern.compile("[^\\s\\w,'\\.:/\\$-]\\w|\\w[^\\s\\w,'\\.:/\\$-]").matcher(this.text);
        while (matcher3.find()) {
            if (!partOfURL(matcher3.start())) {
                myAdd(linkedList, matcher3.start(), ")punctuation2");
                myAdd(linkedList, matcher3.start() + 1, "(punctuation2");
            }
        }
        Integer[] numArr = (Integer[]) linkedList.toArray(new Integer[linkedList.size()]);
        int[] iArr = new int[numArr.length];
        for (int i9 = 0; i9 < iArr.length; i9++) {
            iArr[i9] = numArr[i9].intValue();
        }
        Arrays.sort(iArr);
        Word word = new Word(this.text.substring(iArr[0], iArr[1] + 1), iArr[0] + this.start, iArr[1] + this.start);
        for (int i10 = 2; i10 < iArr.length; i10 += 2) {
            word.next = new Word(this.text.substring(iArr[i10], iArr[i10 + 1] + 1), word, iArr[i10] + this.start, iArr[i10 + 1] + this.start);
            word = (Word) word.next;
        }
        this.inURL = null;
        return new LinkedVector(word);
    }

    private boolean partOfURL(int i) {
        if (this.inURL != null) {
            return this.inURL[i];
        }
        this.inURL = new boolean[this.text.length()];
        String stringBuffer = new StringBuffer().append("(?i)(").append(protocols[0]).toString();
        for (int i2 = 1; i2 < protocols.length; i2++) {
            stringBuffer = new StringBuffer().append(stringBuffer).append("|").append(protocols[i2]).toString();
        }
        String stringBuffer2 = new StringBuffer().append(stringBuffer).append(")://\\S+|[a-zA-Z0-9][a-zA-Z0-9-]*\\.(").append(topLevelDomains[0]).toString();
        for (int i3 = 0; i3 < topLevelDomains.length; i3++) {
            stringBuffer2 = new StringBuffer().append(stringBuffer2).append("|").append(topLevelDomains[i3]).toString();
        }
        Matcher matcher = Pattern.compile(new StringBuffer().append(stringBuffer2).append(")(/\\S+)?").toString()).matcher(this.text);
        while (matcher.find()) {
            for (int start = matcher.start(); start < matcher.end(); start++) {
                this.inURL[start] = true;
            }
        }
        return this.inURL[i];
    }

    public String toString() {
        return this.text;
    }
}
