package edu.northwestern.at.utils.corpuslinguistics.tokenizer;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.StringUtils;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/northwestern/at/utils/corpuslinguistics/tokenizer/EEBOWordTokenizer.class */
public class EEBOWordTokenizer extends DefaultWordTokenizer implements WordTokenizer {
    protected static final Pattern numberDotSpellingPattern = Pattern.compile("(\\d+)\\.(\\p{L})+");
    protected static final Matcher numberDotSpellingMatcher = numberDotSpellingPattern.matcher("");
    protected static Pattern underlineCapCapPattern = Pattern.compile("^_([ABCDEFGHIJKLMNOPQRSTUVWXYZ])([ABCDEFGHIJKLMNOPQRSTUVWXYZ])");
    protected static final Matcher underlineCapCapMatcher = underlineCapCapPattern.matcher("");

    @Override // edu.northwestern.at.utils.corpuslinguistics.tokenizer.AbstractWordTokenizer, edu.northwestern.at.utils.corpuslinguistics.tokenizer.WordTokenizer
    public String preprocessToken(String str, List<String> list) {
        String str2 = str;
        if (!str2.equals(CharUtils.VERTICAL_BAR_STRING)) {
            str2 = StringUtils.replaceAll(str, CharUtils.VERTICAL_BAR_STRING, "");
        }
        if (str2.length() > 1) {
            if (str2.charAt(0) != '+') {
                str2 = StringUtils.replaceAll(str2, "+", "");
            } else if (!CharUtils.isPunctuationOrSymbol(str2)) {
                str2 = str2.substring(1);
                if (list.size() > 0) {
                    str2 = list.get(list.size() - 1) + str2;
                    list.remove(list.size() - 1);
                }
            }
        }
        if (str2.length() > 1 && str2.charAt(0) == '_') {
            underlineCapCapMatcher.reset(str2);
            if (underlineCapCapMatcher.find()) {
                str2 = (str2.charAt(1) + "") + (Character.toLowerCase(str2.charAt(2)) + "") + (str2.length() > 3 ? str2.substring(3) : "");
            }
        }
        if (str2.length() > 2 && str2.indexOf(".") > 0) {
            numberDotSpellingMatcher.reset(str2);
            if (numberDotSpellingMatcher.matches()) {
                list.add(numberDotSpellingMatcher.group(1) + ".");
                str2 = numberDotSpellingMatcher.group(2);
            }
        }
        return str2;
    }
}
