package cmu.arktweetnlp.impl.features;

import cmu.arktweetnlp.Twokenize;
import com.twitter.Regex;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:cmu/arktweetnlp/impl/features/FeatureUtil.class */
public class FeatureUtil {
    public static Pattern URL = Pattern.compile(Twokenize.OR(Twokenize.url, Twokenize.Email));
    public static Pattern justbase = Pattern.compile("(?!www\\.|ww\\.|w\\.|@)[a-zA-Z0-9]+\\.[A-Za-z0-9\\.]+");
    static Pattern repeatchar = Pattern.compile("([\\w])\\1{1,}");
    static Pattern repeatvowel = Pattern.compile("(a|e|i|o|u)\\1+");

    public static ArrayList<String> normalize(List<String> list) {
        ArrayList<String> arrayList = new ArrayList<>(list.size());
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(normalize(it.next()));
        }
        return arrayList;
    }

    public static String normalize(String str) {
        String lowerCase = str.toLowerCase();
        if (!URL.matcher(lowerCase).matches()) {
            return Regex.VALID_MENTION_OR_LIST.matcher(lowerCase).matches() ? "<@MENTION>" : lowerCase;
        }
        Matcher matcher = justbase.matcher(lowerCase);
        return "<URL-" + (matcher.find() ? matcher.group().toLowerCase() : "") + ">";
    }

    public static ArrayList<String> normalizecap(List<String> list) {
        ArrayList<String> arrayList = new ArrayList<>(list.size());
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(normalizecap(it.next()));
        }
        return arrayList;
    }

    public static String normalizecap(String str) {
        if (!URL.matcher(str).matches()) {
            return Regex.VALID_MENTION_OR_LIST.matcher(str).matches() ? "<@MENTION>" : str;
        }
        Matcher matcher = justbase.matcher(str);
        return "<URL-" + (matcher.find() ? matcher.group().toLowerCase() : "") + ">";
    }

    public static ArrayList<String> fuzztoken(String str, boolean z) {
        ArrayList<String> arrayList = new ArrayList<>();
        arrayList.add(str.replaceAll("[‘’´`]", "'").replaceAll("[“”]", "\""));
        arrayList.add(str);
        arrayList.add(repeatchar.matcher(str).replaceAll("$1"));
        arrayList.add(repeatchar.matcher(str).replaceAll("$1$1"));
        arrayList.add(repeatvowel.matcher(str).replaceAll("$1"));
        if (z && !str.startsWith("<URL")) {
            arrayList.add(str.replaceAll("\\p{Punct}", ""));
            arrayList.addAll(Arrays.asList(str.split("\\p{Punct}")));
        }
        return arrayList;
    }
}
