package org.aksw.autosparql.tbsl.algorithm.ltag.parser;

import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.aksw.autosparql.commons.nlp.ner.DBpediaSpotlightNER;
import org.apache.log4j.Logger;

/* loaded from: input_file:org/aksw/autosparql/tbsl/algorithm/ltag/parser/Preprocessor.class */
public class Preprocessor {
    private static final Logger logger = Logger.getLogger(Preprocessor.class);
    static final String[] genericReplacements = {"[!?.,;]", ""};
    static final String[] englishReplacements = {"don't", "do not", "doesn't", "does not", ",\\s", " and ", " and but ", " but "};
    static final String[] hackReplacements = {" 1 ", " one ", " 2 ", " two ", " 3 ", " three ", " 4 ", " four ", " 5 ", " five ", " 6 ", " six ", " 7 ", " seven ", " 8 ", " eight ", " 9 ", " nine ", " 10 ", " ten ", " 11 ", " eleven ", " 12 ", " twelve ", " 13 ", " thirteen ", " 14 ", " fourteen ", " 15 ", " fifteen ", " 16 ", " sixteen ", " 17 ", " seventeen ", " 18 ", " eighteen ", " 19 ", " nineteen ", " 20 ", " twenty "};
    static boolean USE_NER;
    static boolean VERBOSE;
    public static DBpediaSpotlightNER ner;
    public List<String> usefulnamedentities;

    public Preprocessor(boolean z) {
        USE_NER = z;
        VERBOSE = true;
        if (USE_NER) {
            ner = new DBpediaSpotlightNER();
        }
    }

    public void setVERBOSE(boolean z) {
        VERBOSE = z;
    }

    public String normalize(String str) {
        return replacements(ascii(str), new String[0]);
    }

    public String ascii(String str) {
        return Normalizer.normalize(str, Normalizer.Form.NFD).replaceAll("[\\p{InCombiningDiacriticalMarks}]", "").replaceAll("ß", "ss");
    }

    public String replacements(String str, String... strArr) {
        if (strArr.length % 2 != 0 || genericReplacements.length % 2 != 0 || englishReplacements.length % 2 != 0) {
            throw new IllegalArgumentException();
        }
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(Arrays.asList(strArr));
        arrayList.addAll(Arrays.asList(englishReplacements));
        arrayList.addAll(Arrays.asList(genericReplacements));
        arrayList.addAll(Arrays.asList(hackReplacements));
        for (int i = 0; i < arrayList.size(); i += 2) {
            str = str.replaceAll((String) arrayList.get(i), (String) arrayList.get(i + 1));
        }
        return str;
    }

    public String lowercase(String str, String str2) {
        for (String str3 : replacements(str2, new String[0]).split(" ")) {
            str = str.replace(str3, str3.toLowerCase());
        }
        return str;
    }

    public String condense(String str) {
        String replaceAll = str.replaceAll("``/``", "").replaceAll("''/''", "").replaceAll("  ", " ");
        Pattern compile = Pattern.compile("(\\w+/RBR.(\\w+)/JJ)");
        Pattern compile2 = Pattern.compile("(how/WRB.many/JJ)");
        Pattern compile3 = Pattern.compile("(\\w+/WRB.(\\w+)(?<!many)/JJ)");
        Pattern compile4 = Pattern.compile("(the/DT.same/JJ.(\\w+)/NN.as/IN)");
        Pattern compile5 = Pattern.compile("\\s((\\w+)/NNS?.of/IN)");
        Pattern compile6 = Pattern.compile("(?i)(\\s((did)|(do)|(does))/VB.?)\\s");
        Pattern compile7 = Pattern.compile("(\\A\\w+/((TO)|(IN)).)\\w+/WDT");
        Pattern compile8 = Pattern.compile("(((has)|(have)|(had))/VB[A-Z]?.been/VBN.(\\w+)/VBN.by/IN)");
        Pattern compile9 = Pattern.compile("(\\s((has)|(have)|(had))/VB[A-Z]?(.+\\s)been/VBN\\s(\\w+)/VB(N|D))");
        Pattern compile10 = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.by/IN)");
        Pattern compile11 = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.\\w+/((TO)|(IN)))");
        Pattern compile12 = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)\\s(\\w+)/VB(N|D).\\w+/((TO)|(IN)))");
        Pattern compile13 = Pattern.compile("((is)|(are))/(VB[A-Z]?).there/(RB)");
        Pattern compile14 = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.((.+)\\s\\w+)/VB(N|D))");
        Pattern compile15 = Pattern.compile("\\s((\\w+)/VBN.by/IN)");
        Pattern compile16 = Pattern.compile("\\s(\\w+/VBD.(\\w+)/VBN)");
        Pattern compile17 = Pattern.compile("\\s((\\w+)/VPASS.\\w+/IN)");
        Pattern compile18 = Pattern.compile("\\s((\\w+)/((VBG)|(VBN)).\\w+/IN)");
        Pattern compile19 = Pattern.compile("\\s((\\w+)(?<!have)/V[A-Z]+\\s\\w+/(IN|TO))");
        Pattern compile20 = Pattern.compile("\\A(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))");
        Pattern compile21 = Pattern.compile("\\A(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))");
        Pattern compile22 = Pattern.compile("((\\w+)/JJ.(\\w+)/JJ)");
        Pattern compile23 = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?(\\s|\\z))");
        Pattern compile24 = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NPREP)");
        Matcher matcher = compile.matcher(replaceAll);
        while (matcher.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher.group(1) + " by " + matcher.group(2) + "/JJR");
            }
            replaceAll = replaceAll.replaceFirst(matcher.group(1), matcher.group(2) + "/JJR");
        }
        Matcher matcher2 = compile2.matcher(replaceAll);
        while (matcher2.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher2.group(1) + " by how/WLEX many/WLEX");
            }
            replaceAll = replaceAll.replaceFirst(matcher2.group(1), "how/WLEX many/WLEX");
        }
        Matcher matcher3 = compile3.matcher(replaceAll);
        while (matcher3.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher3.group(1) + " by " + matcher3.group(2) + "/JJH");
            }
            replaceAll = replaceAll.replaceFirst(matcher3.group(1), matcher3.group(2) + "/JJH");
        }
        Matcher matcher4 = compile4.matcher(replaceAll);
        while (matcher4.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher4.group(1) + " by " + matcher4.group(2) + "/NNSAME");
            }
            replaceAll = replaceAll.replaceFirst(matcher4.group(1), matcher4.group(2) + "/NNSAME");
        }
        Matcher matcher5 = compile5.matcher(replaceAll);
        while (matcher5.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher5.group(1) + " by " + matcher5.group(2) + "/NPREP");
            }
            replaceAll = replaceAll.replaceFirst(matcher5.group(1), matcher5.group(2) + "/NPREP");
        }
        Matcher matcher6 = compile6.matcher(replaceAll);
        while (matcher6.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher6.group(1) + " by \"\"");
            }
            replaceAll = replaceAll.replaceFirst(matcher6.group(1), "");
        }
        Matcher matcher7 = compile7.matcher(replaceAll);
        while (matcher7.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher7.group(1) + " by \"\"");
            }
            replaceAll = replaceAll.replaceFirst(matcher7.group(1), "");
        }
        Matcher matcher8 = compile8.matcher(replaceAll);
        while (matcher8.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher8.group(1) + " by " + matcher8.group(6) + "/PASSIVE");
            }
            replaceAll = replaceAll.replaceFirst(matcher8.group(1), matcher8.group(6) + "/PASSIVE");
        }
        Matcher matcher9 = compile9.matcher(replaceAll);
        while (matcher9.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher9.group(1) + " by " + matcher9.group(6) + matcher9.group(7) + "/PASSIVE");
            }
            replaceAll = replaceAll.replaceFirst(matcher9.group(1), matcher9.group(6) + matcher9.group(7) + "/PASSIVE");
        }
        Matcher matcher10 = compile10.matcher(replaceAll);
        while (matcher10.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher10.group(1) + " by " + matcher10.group(7) + "/PASSIVE");
            }
            replaceAll = replaceAll.replaceFirst(matcher10.group(1), matcher10.group(7) + "/PASSIVE");
        }
        Matcher matcher11 = compile11.matcher(replaceAll);
        while (matcher11.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher11.group(1) + " by " + matcher11.group(7) + "/VPREP");
            }
            replaceAll = replaceAll.replaceFirst(matcher11.group(1), matcher11.group(7) + "/VPREP");
        }
        Matcher matcher12 = compile12.matcher(replaceAll);
        while (matcher12.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher12.group(1) + " by " + matcher12.group(7) + matcher12.group(8) + "/VPREP");
            }
            replaceAll = replaceAll.replaceFirst(matcher12.group(1), matcher12.group(7) + " " + matcher12.group(8) + "/VPREP");
        }
        Matcher matcher13 = compile13.matcher(replaceAll);
        while (matcher13.find()) {
            replaceAll = replaceAll.replaceFirst(matcher13.group(4), "LEX").replaceFirst(matcher13.group(5), "LEX");
        }
        Matcher matcher14 = compile14.matcher(replaceAll);
        while (matcher14.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher14.group(1) + " by " + matcher14.group(7) + "/PASSIVE");
            }
            replaceAll = replaceAll.replaceFirst(matcher14.group(1), matcher14.group(7) + "/PASSIVE");
        }
        Matcher matcher15 = compile15.matcher(replaceAll);
        while (matcher15.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher15.group(1) + " by " + matcher15.group(2) + "/PASSPART");
            }
            replaceAll = replaceAll.replaceFirst(matcher15.group(1), matcher15.group(2) + "/PASSPART");
        }
        Matcher matcher16 = compile16.matcher(replaceAll);
        while (matcher16.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher16.group(1) + " by " + matcher16.group(2) + "/VPASS");
            }
            replaceAll = replaceAll.replaceFirst(matcher16.group(1), matcher16.group(2) + "/VPASS");
        }
        Matcher matcher17 = compile17.matcher(replaceAll);
        while (matcher17.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher17.group(1) + " by " + matcher17.group(2) + "/VPASSIN");
            }
            replaceAll = replaceAll.replaceFirst(matcher17.group(1), matcher17.group(2) + "/VPASSIN");
        }
        Matcher matcher18 = compile18.matcher(replaceAll);
        while (matcher18.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher18.group(1) + " by " + matcher18.group(2) + "/GERUNDIN");
            }
            replaceAll = replaceAll.replaceFirst(matcher18.group(1), matcher18.group(2) + "/GERUNDIN");
        }
        Matcher matcher19 = compile19.matcher(replaceAll);
        while (matcher19.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher19.group(1) + " by " + matcher19.group(2) + "/VPREP");
            }
            replaceAll = replaceAll.replaceFirst(matcher19.group(1), matcher19.group(2) + "/VPREP");
        }
        Matcher matcher20 = compile20.matcher(replaceAll);
        while (matcher20.find()) {
            if (matcher20.group(4).equals("VPREP")) {
                if (VERBOSE) {
                    logger.debug("Replacing " + matcher20.group(1) + " by " + matcher20.group(2) + matcher20.group(3) + "/WHENPREP");
                }
                replaceAll = replaceAll.replaceFirst(matcher20.group(1), matcher20.group(2) + matcher20.group(3) + "/WHENPREP");
            } else {
                if (VERBOSE) {
                    logger.debug("Replacing " + matcher20.group(1) + " by " + matcher20.group(2) + matcher20.group(3) + "/WHEN");
                }
                replaceAll = replaceAll.replaceFirst(matcher20.group(1), matcher20.group(2) + matcher20.group(3) + "/WHEN");
            }
        }
        Matcher matcher21 = compile21.matcher(replaceAll);
        while (matcher21.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher21.group(1) + " by " + matcher21.group(2) + matcher21.group(3) + "/WHERE");
            }
            replaceAll = replaceAll.replaceFirst(matcher21.group(1), matcher21.group(2) + matcher21.group(3) + "/WHERE");
        }
        Matcher matcher22 = compile22.matcher(replaceAll);
        while (matcher22.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher22.group(1) + " by " + matcher22.group(2) + "_" + matcher22.group(3) + "/JJ");
            }
            replaceAll = replaceAll.replaceFirst(matcher22.group(1), matcher22.group(2) + "_" + matcher22.group(3) + "/JJ");
        }
        Matcher matcher23 = compile23.matcher(replaceAll);
        while (matcher23.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher23.group(1) + " by " + matcher23.group(2) + "_" + matcher23.group(3) + "/JJNN");
            }
            replaceAll = replaceAll.replaceFirst(matcher23.group(1), matcher23.group(2) + "_" + matcher23.group(3) + "/JJNN ");
        }
        Matcher matcher24 = compile24.matcher(replaceAll);
        while (matcher24.find()) {
            if (VERBOSE) {
                logger.debug("Replacing " + matcher24.group(1) + " by " + matcher24.group(2) + "_" + matcher24.group(3) + "/NPREP");
            }
            replaceAll = replaceAll.replaceFirst(matcher24.group(1), matcher24.group(2) + "_" + matcher24.group(3) + "/NPREP");
        }
        return replaceAll;
    }

    public String condenseNominals(String str) {
        String str2 = str;
        Pattern compile = Pattern.compile("``/``(\\s)?(\\w+(/\\w+\\s)).*''/''");
        Pattern compile2 = Pattern.compile("(``/``((.*)_)''/'')");
        Pattern compile3 = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)");
        Pattern compile4 = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)");
        Pattern compile5 = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+)/NN[S]?)(\\W|$)");
        Matcher matcher = compile.matcher(str2);
        while (true) {
            Matcher matcher2 = matcher;
            if (!matcher2.find()) {
                break;
            }
            str2 = str2.replaceFirst(matcher2.group(3), "_");
            matcher = compile.matcher(str2);
        }
        Matcher matcher3 = compile2.matcher(str2);
        while (matcher3.find()) {
            str2 = str2.replaceFirst(matcher3.group(2), matcher3.group(3) + "/NNP");
        }
        Matcher matcher4 = compile3.matcher(str2);
        while (true) {
            Matcher matcher5 = matcher4;
            if (!matcher5.find()) {
                break;
            }
            str2 = str2.replaceFirst(matcher5.group(1), matcher5.group(2) + "_" + matcher5.group(3));
            matcher4 = compile3.matcher(str2);
        }
        Matcher matcher6 = compile3.matcher(str2);
        while (true) {
            Matcher matcher7 = matcher6;
            if (!matcher7.find()) {
                break;
            }
            str2 = str2.replaceFirst(matcher7.group(1), matcher7.group(2) + "_" + matcher7.group(3));
            matcher6 = compile3.matcher(str2);
        }
        Matcher matcher8 = compile4.matcher(str2);
        while (true) {
            Matcher matcher9 = matcher8;
            if (!matcher9.find()) {
                break;
            }
            str2 = str2.replaceFirst(matcher9.group(1), matcher9.group(2) + "_" + matcher9.group(3));
            matcher8 = compile4.matcher(str2);
        }
        Matcher matcher10 = compile5.matcher(str2);
        while (true) {
            Matcher matcher11 = matcher10;
            if (!matcher11.find()) {
                return str2;
            }
            str2 = str2.replaceFirst(matcher11.group(1), matcher11.group(2) + "_" + matcher11.group(3) + "/NNP" + matcher11.group(4));
            matcher10 = compile5.matcher(str2);
        }
    }

    public String findNEs(String str, String str2) {
        String str3 = str;
        String[] strArr = {"NN", "NNS", "NNP", "NNPS", "NPREP", "JJ", "JJR", "JJS", "JJH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "PASSIVE", "PASSPART", "VPASS", "VPASSIN", "GERUNDIN", "VPREP", "WHEN", "WHERE", "IN", "TO", "DT"};
        List<String> namedEntitites = ner.getNamedEntitites(str2);
        this.usefulnamedentities = new ArrayList();
        if (VERBOSE) {
            logger.debug("Proposed NEs: " + namedEntitites);
        }
        for (String str4 : namedEntitites) {
            if (str4.matches(".*[A-Z].*") && !Arrays.asList(strArr).contains(str4)) {
                boolean z = true;
                for (String str5 : namedEntitites) {
                    if (!str5.equals(str4) && str5.contains(str4)) {
                        z = false;
                    }
                }
                if (z) {
                    this.usefulnamedentities.add(str4);
                }
            }
        }
        if (VERBOSE) {
            logger.debug("Accepted NEs: " + this.usefulnamedentities);
        }
        Iterator<String> it = this.usefulnamedentities.iterator();
        while (it.hasNext()) {
            for (String str6 : it.next().split(" ")) {
                Matcher matcher = Pattern.compile("(\\s)?(" + str6 + "/([A-Z]+))(\\s)?").matcher(str3);
                while (matcher.find()) {
                    str3 = str3.replaceFirst(matcher.group(2), str6 + "/NNP");
                }
            }
        }
        return str3;
    }
}
