package org.aksw.qa.commons.qald;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactoryConfigurationError;
import org.aksw.qa.commons.datastructure.IQuestion;
import org.aksw.qa.commons.load.Dataset;
import org.aksw.qa.commons.load.LoaderController;
import org.aksw.qa.commons.load.json.EJDataset;
import org.aksw.qa.commons.load.json.EJQuestionFactory;
import org.aksw.qa.commons.load.json.ExtendedJson;
import org.aksw.qa.commons.load.json.ExtendedQALDJSONLoader;
import org.aksw.qa.commons.load.json.QaldJson;
import org.aksw.qa.commons.sparql.SPARQLEndpoints;
import org.aksw.qa.commons.sparql.ThreadedSPARQL;
import org.aksw.qa.commons.store.StoreQALDXML;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.lib.Chars;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QueryParseException;
import org.apache.jena.rdf.model.RDFNode;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;

/* loaded from: input_file:BOOT-INF/lib/commons-0.4.16.jar:org/aksw/qa/commons/qald/Qald7CreationTool.class */
public class Qald7CreationTool {
    private static final String DBO_URI = "http://dbpedia.org/ontology/";
    private static final String RES_URI = "http://dbpedia.org/resource/";
    private int duplicate;
    public static final Set<Dataset> MULTILINGUAL_TRAIN_TEST_SETS = ImmutableSet.of(Dataset.QALD3_Test_dbpedia, Dataset.QALD4_Test_Multilingual, Dataset.QALD5_Test_Multilingual, Dataset.QALD6_Test_Multilingual, Dataset.QALD3_Train_dbpedia, Dataset.QALD4_Train_Multilingual, Dataset.QALD4_Train_Multilingual, Dataset.QALD5_Train_Multilingual, Dataset.QALD6_Train_Multilingual);
    public static final Set<Dataset> HYBRID_SETS = ImmutableSet.of(Dataset.QALD4_Test_Hybrid, Dataset.QALD4_Train_Hybrid, Dataset.QALD5_Test_Hybrid, Dataset.QALD5_Train_Hybrid, Dataset.QALD6_Test_Hybrid, Dataset.QALD6_Train_Hybrid, new Dataset[0]);
    private final ThreadedSPARQL sparql;
    int badQuestionCounter;

    public Qald7CreationTool() {
        this.duplicate = 0;
        this.badQuestionCounter = 0;
        this.sparql = new ThreadedSPARQL(90, SPARQLEndpoints.DBPEDIA_ORG);
    }

    public Qald7CreationTool(String str, int i) {
        this.duplicate = 0;
        this.badQuestionCounter = 0;
        this.sparql = new ThreadedSPARQL(i, str);
    }

    public Set<Qald7Question> getQald7HybridQuestions(Set<Dataset> set) {
        HashSet hashSet = new HashSet();
        Iterator<Dataset> it = set.iterator();
        while (it.hasNext()) {
            for (Qald7Question qald7Question : Qald7QuestionFactory.createInstances(LoaderController.load(it.next()))) {
                qald7Question.setHybrid(true);
                hashSet.add(qald7Question);
            }
        }
        return hashSet;
    }

    public void createQald7HybridDataset(Set<Dataset> set, String str, String str2) {
        createQald7Dataset(getQald7HybridQuestions(set), str, str2);
    }

    public void createQald7MultilingualTrainDataset(Set<Dataset> set, boolean z, boolean z2, String str, String str2) {
        Set<Qald7Question> loadAndAnnotateTrain = loadAndAnnotateTrain(set, z2);
        createQald7Dataset(extractGoodTrainQuestionsFromAnnotated(loadAndAnnotateTrain), str, str2);
        if (z) {
            createFileReport(loadAndAnnotateTrain, str + "BadQuestionsfileReport.txt", new HashSet());
        }
    }

    public Set<Qald7Question> getQald7MultilingualTrainQuestions(Set<Dataset> set, boolean z) {
        return extractGoodTrainQuestionsFromAnnotated(loadAndAnnotateTrain(set, z));
    }

    public Set<Qald7Question> loadAndAnnotateTrain(Set<Dataset> set, boolean z) {
        List<Qald7Question> arrayList = new ArrayList<>();
        for (Dataset dataset : set) {
            for (Qald7Question qald7Question : Qald7QuestionFactory.createInstances(LoaderController.load(dataset))) {
                arrayList.add(qald7Question);
                qald7Question.setFromDataset(dataset);
                qald7Question.setFails(new HashSet<>());
                if (!checkAtleastSixLanguages(qald7Question)) {
                    qald7Question.addFail(Fail.MISSING_LANGUAGES);
                }
                if (!checkKeywordsPresent(qald7Question)) {
                    qald7Question.addFail(Fail.MISSING_KEYWORDS);
                }
                if (!checkAnswertypeSet(qald7Question)) {
                    qald7Question.addFail(Fail.ANSWERTYPE_NOT_SET);
                }
                if (checkSparqlPresent(qald7Question)) {
                    try {
                        if (checkIsOnlydbo(qald7Question.getSparqlQuery()) != qald7Question.getOnlydbo().booleanValue()) {
                            if (z) {
                                qald7Question.setOnlydbo(Boolean.valueOf(checkIsOnlydbo(qald7Question.getSparqlQuery())));
                            } else {
                                qald7Question.addFail(Fail.ISONLYDBO_WRONG);
                            }
                        }
                    } catch (QueryParseException e) {
                        qald7Question.addFail(Fail.SPARQL_PARSE_ERROR);
                    }
                } else {
                    qald7Question.addFail(Fail.SPARQL_MISSING);
                }
                Set<String> emptySet = Collections.emptySet();
                try {
                    emptySet = getAnswersFromServer(qald7Question);
                } catch (ExecutionException e2) {
                    qald7Question.addFail(Fail.SPARQL_NOT_EXECUTABLE);
                }
                qald7Question.setServerAnswers(emptySet);
                Set<String> goldenAnswers = qald7Question.getGoldenAnswers();
                Set<String> emptySet2 = goldenAnswers == null ? Collections.emptySet() : goldenAnswers;
                if (emptySet2.isEmpty()) {
                    qald7Question.addFail(Fail.NO_ANSWERS_IN_DATASET);
                }
                if (!emptySet2.containsAll(emptySet) || !emptySet.containsAll(emptySet2)) {
                    qald7Question.addFail(Fail.ANSWERSET_DIFFERS);
                }
            }
        }
        return findAndSelectBestDuplicate(arrayList);
    }

    private boolean checkSparqlPresent(IQuestion iQuestion) {
        return !StringUtils.isEmpty(iQuestion.getSparqlQuery());
    }

    private boolean checkAnswertypeSet(IQuestion iQuestion) {
        return !Strings.isNullOrEmpty(iQuestion.getAnswerType());
    }

    private boolean checkKeywordsPresent(IQuestion iQuestion) {
        for (String str : iQuestion.getLanguageToQuestion().keySet()) {
            if (iQuestion.getLanguageToKeywords().get(str) == null || iQuestion.getLanguageToKeywords().get(str).isEmpty()) {
                return false;
            }
        }
        return true;
    }

    private boolean checkAtleastSixLanguages(IQuestion iQuestion) {
        return iQuestion.getLanguageToQuestion().values().size() >= 6;
    }

    public Set<String> getAnswersFromServer(IQuestion iQuestion) throws ExecutionException {
        Set<RDFNode> sparql = this.sparql.sparql(iQuestion.getSparqlQuery());
        Set<RDFNode> emptySet = sparql == null ? Collections.emptySet() : sparql;
        HashSet hashSet = new HashSet();
        for (RDFNode rDFNode : emptySet) {
            if (rDFNode.isResource()) {
                hashSet.add(rDFNode.asResource().getURI());
            } else {
                hashSet.add(rDFNode.asLiteral().getString());
            }
        }
        return hashSet;
    }

    private void addSave(Map<String, List<Qald7Question>> map, String str, Qald7Question qald7Question) {
        if (map.get(str) == null) {
            map.put(str, new ArrayList());
        }
        map.get(str).add(qald7Question);
    }

    private Set<Qald7Question> findAndSelectBestDuplicate(List<Qald7Question> list) {
        this.duplicate = 0;
        HashSet hashSet = new HashSet();
        HashMap hashMap = new HashMap();
        for (Qald7Question qald7Question : list) {
            addSave(hashMap, qald7Question.getLanguageToQuestion().get("en"), qald7Question);
        }
        try {
            hashSet.addAll(hashMap.get(null));
            hashMap.remove(null);
        } catch (NullPointerException e) {
        }
        for (String str : hashMap.keySet()) {
            if (hashMap.get(str).size() == 1) {
                hashSet.addAll(hashMap.get(str));
            } else {
                int i = Integer.MAX_VALUE;
                this.duplicate += hashMap.get(str).size() - 1;
                for (Qald7Question qald7Question2 : hashMap.get(str)) {
                    if (qald7Question2.getFails().size() < i) {
                        i = qald7Question2.getFails().size();
                    }
                }
                Iterator<Qald7Question> it = hashMap.get(str).iterator();
                while (true) {
                    if (it.hasNext()) {
                        Qald7Question next = it.next();
                        if (next.getFails().size() == i) {
                            hashSet.add(next);
                            break;
                        }
                    }
                }
            }
        }
        return hashSet;
    }

    private Set<Qald7Question> extractGoodTrainQuestionsFromAnnotated(Set<Qald7Question> set) {
        HashSet hashSet = new HashSet();
        for (Qald7Question qald7Question : set) {
            if (qald7Question.getFails().isEmpty()) {
                hashSet.add(qald7Question);
            }
        }
        return hashSet;
    }

    private Set<Qald7Question> extractBadQuestionsFromAnnotated(Set<Qald7Question> set, Set<Fail> set2) {
        HashSet hashSet = new HashSet();
        for (Qald7Question qald7Question : set) {
            HashSet hashSet2 = new HashSet(qald7Question.getFails());
            hashSet2.removeAll(set2);
            if (!hashSet2.isEmpty()) {
                hashSet.add(qald7Question);
            }
        }
        return hashSet;
    }

    private void createQald7Dataset(Set<Qald7Question> set, String str, String str2) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (Qald7Question qald7Question : set) {
            int i2 = i;
            i++;
            qald7Question.setId("" + i2);
            arrayList.add(qald7Question);
        }
        EJDataset eJDataset = new EJDataset();
        eJDataset.setId(str2);
        QaldJson qaldJson = EJQuestionFactory.getQaldJson(arrayList);
        qaldJson.setDataset(eJDataset);
        ExtendedJson extendedJson = EJQuestionFactory.getExtendedJson(arrayList);
        extendedJson.setDataset(eJDataset);
        try {
            ExtendedQALDJSONLoader.writeJson(qaldJson, new File(str + "QaldJson_" + str2 + ".json"), true);
            ExtendedQALDJSONLoader.writeJson(extendedJson, new File(str + "ExtendedJson_" + str2 + ".json"), true);
        } catch (IOException e) {
            e.printStackTrace();
        }
        try {
            StoreQALDXML storeQALDXML = new StoreQALDXML(str2);
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                storeQALDXML.write((IQuestion) it.next());
            }
            storeQALDXML.close(str + "XML_" + str2 + ".xml", str2);
        } catch (IOException e2) {
            e2.printStackTrace();
        } catch (ParserConfigurationException e3) {
            e3.printStackTrace();
        } catch (TransformerException e4) {
            e4.printStackTrace();
        } catch (TransformerFactoryConfigurationError e5) {
            e5.printStackTrace();
        }
    }

    public void createFileReportForTestQuestions(Set<Dataset> set, boolean z, String str, Set<Fail> set2) {
        createFileReport(loadAndAnnotateTrain(set, z), str, set2);
    }

    public void createFileReport(Set<Qald7Question> set, String str, Set<Fail> set2) {
        String property = System.getProperty("line.separator");
        Set<Qald7Question> extractBadQuestionsFromAnnotated = extractBadQuestionsFromAnnotated(set, set2);
        StringBuilder sb = new StringBuilder();
        HashSet hashSet = new HashSet();
        for (Qald7Question qald7Question : extractBadQuestionsFromAnnotated) {
            hashSet.add(qald7Question.getFromDataset());
            Set<String> serverAnswers = qald7Question.getServerAnswers();
            Set<String> goldenAnswers = qald7Question.getGoldenAnswers();
            Set<String> emptySet = goldenAnswers == null ? Collections.emptySet() : goldenAnswers;
            Set<String> emptySet2 = serverAnswers == null ? Collections.emptySet() : serverAnswers;
            HashSet hashSet2 = new HashSet(emptySet);
            HashSet hashSet3 = new HashSet(emptySet2);
            hashSet2.removeAll(emptySet2);
            hashSet3.removeAll(emptySet);
            HashSet hashSet4 = new HashSet(qald7Question.getFails());
            hashSet4.removeAll(set2);
            sb.append("_____________________________________________________" + property);
            sb.append("| Question Dataset: " + qald7Question.getFromDataset().name() + " Id: " + qald7Question.getId() + property);
            sb.append("| Flags: " + hashSet4.toString() + property);
            sb.append("| Question: " + qald7Question.getLanguageToQuestion().get("en") + property);
            if (!hashSet2.isEmpty() || !hashSet3.isEmpty()) {
                sb.append("| Sparql Query:" + property);
                sb.append("| " + qald7Question.getSparqlQuery().replaceAll("\\s", " ") + property);
                sb.append("| Answers in dataset and not in Server response" + property);
                sb.append(Chars.S_VBAR + hashSet2.toString() + property + property);
                sb.append("| Answers in Server response and not in Dataset" + property);
                sb.append(Chars.S_VBAR + hashSet3.toString() + property);
            }
            sb.append("_____________________________________________________" + property);
        }
        sb.append("From Datasets :" + hashSet.toString() + property);
        sb.append("Unique Questions total in all Datasets: " + set.size() + " Faulty: " + extractBadQuestionsFromAnnotated.size() + " beforehand removed duplicates: " + this.duplicate);
        try {
            FileWriter fileWriter = new FileWriter(new File(str));
            fileWriter.write(sb.toString());
            fileWriter.flush();
            fileWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private boolean checkIsOnlydbo(String str) throws QueryParseException {
        if (str == null) {
            return false;
        }
        HashMap hashMap = new HashMap(QueryFactory.create(str).getPrefixMapping().getNsPrefixMap());
        HashSet hashSet = new HashSet();
        for (Map.Entry entry : hashMap.entrySet()) {
            if (((String) entry.getKey()).equals(RDF.PREFIX) || ((String) entry.getKey()).equals(RDFS.PREFIX) || ((String) entry.getValue()).equals("http://dbpedia.org/ontology/") || ((String) entry.getValue()).equals("http://dbpedia.org/resource/")) {
                hashSet.add(entry);
            }
        }
        hashMap.entrySet().removeAll(hashSet);
        return hashMap.isEmpty();
    }

    public void destroy() {
        this.sparql.destroy();
    }

    public static void main(String[] strArr) {
        Qald7CreationTool qald7CreationTool = new Qald7CreationTool();
        qald7CreationTool.createQald7HybridDataset(HYBRID_SETS, "", "qald-7-train-hybrid");
        qald7CreationTool.createQald7MultilingualTrainDataset(MULTILINGUAL_TRAIN_TEST_SETS, true, true, "", "qald-7-train-multilingual");
        qald7CreationTool.sparql.destroy();
        System.out.println("duplcates " + qald7CreationTool.duplicate);
        System.out.println("Done");
    }
}
