package org.dllearner.scripts;

import java.io.File;
import java.net.URI;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.log4j.FileAppender;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.SimpleLayout;
import org.dllearner.algorithms.ocel.OCEL;
import org.dllearner.core.AbstractKnowledgeSource;
import org.dllearner.core.ComponentManager;
import org.dllearner.kb.extraction.ExtractionAlgorithm;
import org.dllearner.kb.extraction.Manager;
import org.dllearner.kb.sparql.Cache;
import org.dllearner.kb.sparql.SPARQLTasks;
import org.dllearner.kb.sparql.SparqlEndpoint;
import org.dllearner.kb.sparql.SparqlKnowledgeSource;
import org.dllearner.kb.sparql.SparqlQuery;
import org.dllearner.learningproblems.EvaluatedDescriptionPosNeg;
import org.dllearner.learningproblems.PosNegLPStandard;
import org.dllearner.reasoning.FastInstanceChecker;
import org.dllearner.refinementoperators.RhoDRDown;
import org.dllearner.scripts.improveWikipedia.ConceptSPARQLReEvaluator;
import org.dllearner.scripts.improveWikipedia.ConceptSelector;
import org.dllearner.scripts.improveWikipedia.WikipediaCategoryTasks;
import org.dllearner.utilities.Files;
import org.dllearner.utilities.Helper;
import org.dllearner.utilities.datastructures.SetManipulation;
import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL;
import org.dllearner.utilities.examples.AutomaticPositiveExampleFinderSPARQL;
import org.dllearner.utilities.statistics.SimpleClock;

/* loaded from: input_file:org/dllearner/scripts/WikipediaCategoryCleaner.class */
public class WikipediaCategoryCleaner {
    private static SPARQLTasks sparqlTasks;
    private static Cache cache;
    private static final boolean LOCAL = true;
    private static final boolean DEVELOPSTABLESETS = true;
    public static final int SPARQL_RESULTSET_LIMITa = 500;
    public static final int SPARQL_RESULTSET_LIMIT_NEGATIVES = 20;
    public static final int SPARQL_RESULTSET_LIMIT_CONCEPT_REEVALUATE = 500;
    private static final int DEPTH_OF_RDFS = 0;
    private static Logger logger = Logger.getRootLogger();
    public static double PERCENT_OF_SKOSSET = 1.0d;
    public static double NEGFACTOR = 1.0d;
    public static int MAX_NR_CONCEPTS_TO_BE_EVALUATED = 20;
    public static double ACCURACY_THRESHOLD = 0.0d;
    public static String FILTER_CONCEPTS_BY = "Entity";

    public static void main(String[] strArr) {
        SimpleClock simpleClock = new SimpleClock();
        initLogger();
        setup();
        logger.info("Start");
        TreeSet<String> treeSet = new TreeSet();
        treeSet.addAll(returnCat());
        treeSet.add("http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom");
        treeSet.add("http://dbpedia.org/resource/Category:Best_Actor_Academy_Award_winners");
        int i = DEPTH_OF_RDFS;
        for (String str : treeSet) {
            System.out.println(str);
            doit(str);
            ComponentManager.getInstance().freeAllComponents();
            System.out.println(i);
            i++;
        }
        simpleClock.printAndSet("Finished");
    }

    private static void doit(String str) {
        String str2 = "";
        try {
            try {
                str2 = "wiki/" + URLEncoder.encode(str, "UTF-8") + "/";
                Files.mkdir(str2);
            } catch (Exception e) {
                e.printStackTrace();
            }
            TreeSet treeSet = new TreeSet();
            TreeSet treeSet2 = new TreeSet();
            System.out.println("test");
            WikipediaCategoryTasks wikipediaCategoryTasks = new WikipediaCategoryTasks(sparqlTasks);
            ConceptSPARQLReEvaluator conceptSPARQLReEvaluator = new ConceptSPARQLReEvaluator(sparqlTasks, DEPTH_OF_RDFS, 500);
            wikipediaCategoryTasks.makeInitialExamples(str, PERCENT_OF_SKOSSET, NEGFACTOR, 20, true);
            treeSet.addAll(wikipediaCategoryTasks.getPosExamples());
            treeSet2.addAll(wikipediaCategoryTasks.getNegExamples());
            OCEL learn = learn(treeSet, treeSet2);
            learn.start();
            List<EvaluatedDescriptionPosNeg> selectConcepts = selectConcepts(learn.getCurrentlyBestEvaluatedDescriptions(Integer.MAX_VALUE, 0.5d, true));
            writeList(str2 + "wrongIndividuals.html", wikipediaCategoryTasks.calculateWrongIndividualsAndNewPosEx(selectConcepts, treeSet));
            treeSet.clear();
            treeSet.addAll(wikipediaCategoryTasks.getCleanedPositiveSet());
            writeList(str2 + "correctIndividuals.html", treeSet);
            String str3 = "";
            Iterator<EvaluatedDescriptionPosNeg> it = selectConcepts.iterator();
            while (it.hasNext()) {
                str3 = str3 + it.next() + "\n";
            }
            Files.createFile(new File(str2 + "concepts.html"), str3 + selectConcepts.size() + "\n");
            List<EvaluatedDescriptionPosNeg> reevaluateConceptsByLowestRecall = conceptSPARQLReEvaluator.reevaluateConceptsByLowestRecall(selectConcepts, treeSet);
            try {
                writeList(str2 + "foundIndividuals1.html", SetManipulation.indToString(new TreeSet(reevaluateConceptsByLowestRecall.get(DEPTH_OF_RDFS).getNotCoveredPositives())));
                writeList(str2 + "foundIndividuals2.html", SetManipulation.indToString(new TreeSet(reevaluateConceptsByLowestRecall.get(1).getNotCoveredPositives())));
            } catch (Exception e2) {
                e2.printStackTrace();
            }
            printEvaluatedDescriptionCollection(5, reevaluateConceptsByLowestRecall);
        } catch (Exception e3) {
            e3.printStackTrace();
        }
    }

    private static void writeList(String str, Collection<String> collection) {
        String str2 = "";
        for (String str3 : collection) {
            str2 = str2 + "<a href='" + str3 + "'>" + str3 + "</a><br>\n";
        }
        Files.createFile(new File(str), str2 + collection.size() + "\n");
    }

    private static void collectResults(WikipediaCategoryTasks wikipediaCategoryTasks) {
        SetManipulation.printSet("fullpos", wikipediaCategoryTasks.getFullPositiveSet(), logger);
        SetManipulation.printSet("cleanedpos", wikipediaCategoryTasks.getCleanedPositiveSet(), logger);
        SetManipulation.printSet("wrongindividuals", wikipediaCategoryTasks.getDefinitelyWrongIndividuals(), logger);
    }

    private static List<EvaluatedDescriptionPosNeg> selectConcepts(List<EvaluatedDescriptionPosNeg> list) {
        List<EvaluatedDescriptionPosNeg> conceptsNotContainingString = new ConceptSelector().getConceptsNotContainingString(list, FILTER_CONCEPTS_BY, MAX_NR_CONCEPTS_TO_BE_EVALUATED);
        if (conceptsNotContainingString.size() == 0) {
            logger.warn("NO GOOD CONCEPTS FOUND");
        }
        return conceptsNotContainingString;
    }

    private static OCEL learn(SortedSet<String> sortedSet, SortedSet<String> sortedSet2) {
        OCEL ocel = DEPTH_OF_RDFS;
        try {
            TreeSet treeSet = new TreeSet();
            treeSet.addAll(SetManipulation.stringToInd(sortedSet));
            treeSet.addAll(SetManipulation.stringToInd(sortedSet2));
            SparqlKnowledgeSource sparqlKnowledgeSource = new SparqlKnowledgeSource(URI.create("http://dbpedia.org").toURL(), SetManipulation.indToString(treeSet));
            sparqlKnowledgeSource.setCloseAfterRecursion(true);
            sparqlKnowledgeSource.setRecursionDepth(1);
            sparqlKnowledgeSource.setPredefinedEndpoint("LOCALDBPEDIA");
            sparqlKnowledgeSource.setUseLits(false);
            sparqlKnowledgeSource.setGetAllSuperClasses(true);
            sparqlKnowledgeSource.setPropertyInformation(false);
            sparqlKnowledgeSource.setCacheDir(Cache.getPersistantCacheDir());
            sparqlKnowledgeSource.setPredefinedFilter("YAGOONLY");
            HashSet hashSet = new HashSet();
            hashSet.add(sparqlKnowledgeSource);
            FastInstanceChecker fastInstanceChecker = new FastInstanceChecker(hashSet);
            fastInstanceChecker.setDefaultNegation(false);
            PosNegLPStandard posNegLPStandard = new PosNegLPStandard(fastInstanceChecker, Helper.getIndividualSet(sortedSet), Helper.getIndividualSet(sortedSet2));
            ocel = (OCEL) ComponentManager.getInstance().learningAlgorithm(OCEL.class, posNegLPStandard, fastInstanceChecker);
            ocel.setNoisePercentage(20.0d);
            ocel.setGuaranteeXgoodDescriptions(100);
            ocel.setMaxExecutionTimeInSeconds(50);
            RhoDRDown operator = ocel.getOperator();
            operator.setUseAllConstructor(false);
            operator.setUseBooleanDatatypes(false);
            operator.setUseCardinalityRestrictions(false);
            operator.setUseNegation(false);
            operator.setUseHasValueConstructor(false);
            operator.setUseDoubleDatatypes(false);
            ocel.setWriteSearchTree(true);
            ocel.setReplaceSearchTree(true);
            sparqlKnowledgeSource.init();
            fastInstanceChecker.init();
            posNegLPStandard.init();
            ocel.init();
        } catch (Exception e) {
            e.printStackTrace();
            logger.warn(e);
            logger.warn("error in sparqlprepare");
        }
        return ocel;
    }

    public static void printEvaluatedDescriptionCollection(int i, Collection<EvaluatedDescriptionPosNeg> collection) {
        int i2 = DEPTH_OF_RDFS;
        Set set = DEPTH_OF_RDFS;
        HashSet hashSet = new HashSet();
        for (EvaluatedDescriptionPosNeg evaluatedDescriptionPosNeg : collection) {
            if (i2 == 0) {
                set = evaluatedDescriptionPosNeg.getNotCoveredPositives();
            }
            if (i2 >= i) {
                return;
            }
            i2++;
            hashSet.addAll(evaluatedDescriptionPosNeg.getNotCoveredPositives());
            hashSet.removeAll(set);
            logger.debug("*************************");
            logger.debug("Concept: " + evaluatedDescriptionPosNeg);
            logger.debug("accuracy: " + evaluatedDescriptionPosNeg.getAccuracy());
            logger.debug("Not Covered compared to First: " + hashSet);
            logger.debug(evaluatedDescriptionPosNeg.getScore());
            hashSet.clear();
        }
    }

    private static void printIntermediateResults(SortedSet<String> sortedSet, SortedSet<String> sortedSet2, SortedSet<String> sortedSet3, int i) {
        SetManipulation.printSet("full  Individual set: ", sortedSet, logger);
        SetManipulation.printSet("correct Individuals: ", sortedSet2, logger);
        SetManipulation.printSet("incorrect Individuals: ", sortedSet3, logger);
        logger.info("reevaluated " + i + " found Concepts");
        logger.info("END OF PHASE 1 **********************");
    }

    private static void setup() {
        cache = Cache.getPersistentCache();
        sparqlTasks = new SPARQLTasks(cache, SparqlEndpoint.getEndpointLOCALDBpedia());
    }

    private static void initLogger() {
        SimpleLayout simpleLayout = new SimpleLayout();
        FileAppender fileAppender = DEPTH_OF_RDFS;
        try {
            fileAppender = new FileAppender(simpleLayout, "log/progress/skos" + ConceptSelector.time() + ".txt", false);
        } catch (Exception e) {
            e.printStackTrace();
        }
        logger.removeAllAppenders();
        logger.addAppender(fileAppender);
        logger.setLevel(Level.DEBUG);
        Logger.getLogger(Manager.class).setLevel(Level.INFO);
        Level level = Level.WARN;
        Logger.getLogger(AbstractKnowledgeSource.class).setLevel(level);
        Logger.getLogger(SparqlKnowledgeSource.class).setLevel(level);
        Logger.getLogger(ExtractionAlgorithm.class).setLevel(level);
        Logger.getLogger(AutomaticNegativeExampleFinderSPARQL.class).setLevel(level);
        Logger.getLogger(AutomaticPositiveExampleFinderSPARQL.class).setLevel(level);
        Logger.getLogger(OCEL.class).setLevel(level);
        Logger.getLogger(SparqlQuery.class).setLevel(level);
        Logger.getLogger(Cache.class).setLevel(level);
    }

    private static void findCat() {
        SortedSet<String> queryAsSet = sparqlTasks.queryAsSet("SELECT DISTINCT ?cat WHERE { ?a <http://www.w3.org/2004/02/skos/core#subject> ?cat  }", "cat");
        TreeSet treeSet = new TreeSet();
        int i = DEPTH_OF_RDFS;
        for (String str : queryAsSet) {
            int i2 = i;
            i++;
            System.out.println("" + i2 + " " + treeSet.size());
            SortedSet queryAsSet2 = sparqlTasks.queryAsSet("SELECT DISTINCT ?subject WHERE { ?subject <http://www.w3.org/2004/02/skos/core#subject> <" + str + ">  }", "subject");
            if (40 < queryAsSet2.size() && queryAsSet2.size() < 80) {
                treeSet.add(str);
            }
            if (treeSet.size() > 200 || i > 970) {
                Iterator it = treeSet.iterator();
                while (it.hasNext()) {
                    System.out.println("cat.add(\"" + ((String) it.next()) + "\");");
                }
                System.exit(DEPTH_OF_RDFS);
            }
        }
        System.exit(DEPTH_OF_RDFS);
    }

    private static SortedSet<String> returnCat() {
        TreeSet treeSet = new TreeSet();
        treeSet.add("http://dbpedia.org/resource/Category:Al-Qaeda_activities");
        treeSet.add("http://dbpedia.org/resource/Category:Assassinated_monarchs");
        treeSet.add("http://dbpedia.org/resource/Category:Companies_of_Finland");
        treeSet.add("http://dbpedia.org/resource/Category:Fluorescent_dyes");
        treeSet.add("http://dbpedia.org/resource/Category:Irish_folk_songs");
        treeSet.add("http://dbpedia.org/resource/Category:Islands_of_Tonga");
        treeSet.add("http://dbpedia.org/resource/Category:Concurrent_programming_languages");
        treeSet.add("http://dbpedia.org/resource/Category:Nuremberg");
        treeSet.add("http://dbpedia.org/resource/Category:Satirical_magazines");
        return treeSet;
    }
}
