package org.sante.lucene;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:org/sante/lucene/NLPUtils.class */
public class NLPUtils extends StringUtils {

    /* loaded from: input_file:org/sante/lucene/NLPUtils$Filter.class */
    public enum Filter {
        STOP_WORD,
        STEM,
        LOWER_CASE,
        URI_PATH,
        ACCENT,
        NON,
        UNDERSCORE,
        JAVA_NAME_CONVENTION,
        YAGO
    }

    public static String[] tokens(String str, Filter... filterArr) throws IOException {
        if (Arrays.binarySearch(filterArr, Filter.JAVA_NAME_CONVENTION) > -1) {
            ArrayList arrayList = new ArrayList();
            for (String str2 : tokens(str)) {
                String str3 = "";
                int i = 0;
                while (i < str2.length()) {
                    if (Character.isUpperCase(str2.charAt(i))) {
                        str3 = str3 + " ";
                        while (i < str2.length() && Character.isUpperCase(str2.charAt(i))) {
                            str3 = str3 + str2.charAt(i);
                            i++;
                        }
                        i--;
                    } else {
                        str3 = str3 + str2.charAt(i);
                    }
                    i++;
                }
                arrayList.add(str3);
            }
            str = join(arrayList, " ");
        }
        if (Arrays.binarySearch(filterArr, Filter.YAGO) > -1) {
            ArrayList arrayList2 = new ArrayList();
            for (String str4 : tokens(str)) {
                String str5 = "";
                int i2 = 0;
                while (i2 < str4.length()) {
                    if (Character.isUpperCase(str4.charAt(i2))) {
                        str5 = str5 + " ";
                        while (i2 < str4.length() && Character.isUpperCase(str4.charAt(i2))) {
                            str5 = str5 + str4.charAt(i2);
                            i2++;
                        }
                        i2--;
                    } else if (Character.isDigit(str4.charAt(i2))) {
                        str5 = str5 + " ";
                        while (i2 < str4.length() && Character.isDigit(str4.charAt(i2))) {
                            i2++;
                        }
                        i2--;
                    } else {
                        str5 = str5 + str4.charAt(i2);
                    }
                    i2++;
                }
                arrayList2.add(str5);
            }
            str = join(arrayList2, " ");
        }
        if (Arrays.binarySearch(filterArr, Filter.UNDERSCORE) > -1) {
            str = str.replace("_", " ");
        }
        if (Arrays.binarySearch(filterArr, Filter.URI_PATH) > -1) {
            str = str.replace(".", " ").replace("/", " ");
        }
        ArrayList arrayList3 = new ArrayList();
        StringReader stringReader = new StringReader(str);
        TokenStream standardTokenizer = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
        standardTokenizer.setReader(stringReader);
        TokenStream tokenStream = standardTokenizer;
        boolean z = false;
        for (Filter filter : filterArr) {
            if (filter == Filter.ACCENT) {
                z = true;
                tokenStream = new ASCIIFoldingFilter(tokenStream);
            } else if (filter == Filter.STOP_WORD) {
                tokenStream = new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
            } else if (filter == Filter.STEM) {
                tokenStream = new PorterStemFilter(tokenStream);
            } else if (filter == Filter.LOWER_CASE) {
                tokenStream = new LowerCaseFilter(tokenStream);
            }
        }
        tokenStream.reset();
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        while (tokenStream.incrementToken()) {
            String obj = attribute.toString();
            if (z) {
                obj = obj.replace("'", "").replace("'s", "");
            }
            arrayList3.add(obj);
        }
        standardTokenizer.close();
        tokenStream.close();
        stringReader.close();
        return (String[]) arrayList3.toArray(new String[arrayList3.size()]);
    }

    public static String[] tokens(String str) throws IOException {
        return tokens(str, Filter.NON);
    }

    public static String tokenize(String str, Filter... filterArr) throws IOException {
        return StringUtils.join(tokens(str, filterArr), " ");
    }

    public static String tokenize(String str, String str2, Analyzer analyzer) throws IOException {
        StringReader stringReader = new StringReader(str2);
        ArrayList arrayList = new ArrayList();
        TokenStream tokenStream = analyzer.tokenStream(str, stringReader);
        try {
            tokenStream.reset();
            CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
            while (tokenStream.incrementToken()) {
                arrayList.add(attribute.toString());
            }
            if (tokenStream != null) {
                tokenStream.close();
            }
            return StringUtils.join(arrayList, " ");
        } catch (Throwable th) {
            if (tokenStream != null) {
                try {
                    tokenStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static String[] tokens(String str, String str2, Analyzer analyzer) throws IOException {
        StringReader stringReader = new StringReader(str2);
        ArrayList arrayList = new ArrayList();
        TokenStream tokenStream = analyzer.tokenStream(str, stringReader);
        try {
            tokenStream.reset();
            CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
            while (tokenStream.incrementToken()) {
                arrayList.add(attribute.toString());
            }
            if (tokenStream != null) {
                tokenStream.close();
            }
            return (String[]) arrayList.toArray(new String[arrayList.size()]);
        } catch (Throwable th) {
            if (tokenStream != null) {
                try {
                    tokenStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static String addSuffix(List<String> list, String str) {
        return StringUtils.join(list, str);
    }

    public static String addSuffix(String[] strArr, String str) {
        return StringUtils.join(strArr, str);
    }

    public static List<String> addPreffix(List<String> list, String str) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(str + it.next());
        }
        return arrayList;
    }

    public static String toString(List<String> list) {
        return addSuffix(list, "");
    }

    public static String removeEmptySpaces(String str) {
        return StringUtils.deleteWhitespace(str);
    }

    public static void main(String[] strArr) throws IOException {
        Random random = new Random(100000L);
        for (String str : tokens(new String("politician" + random.nextInt()), Filter.LOWER_CASE, Filter.STEM, Filter.STOP_WORD, Filter.UNDERSCORE, Filter.JAVA_NAME_CONVENTION, Filter.YAGO)) {
            System.out.println(str);
        }
        for (String str2 : tokens(new String("http://teste.org/Object-oriented_programming'" + random.nextInt()), Filter.LOWER_CASE, Filter.STEM, Filter.STOP_WORD, Filter.URI_PATH, Filter.UNDERSCORE)) {
            System.out.println(str2);
        }
    }
}
