package org.aksw.autosparql.tbsl.algorithm.templator;

import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.vocabulary.RDF;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.aksw.autosparql.commons.qald.QaldLoader;
import org.aksw.autosparql.commons.qald.Question;
import org.aksw.autosparql.tbsl.algorithm.sparql.GoldTemplate;
import org.aksw.autosparql.tbsl.algorithm.sparql.Slot;
import org.aksw.autosparql.tbsl.algorithm.sparql.SlotType;
import org.aksw.autosparql.tbsl.algorithm.util.TriplePatternExtractor;
import org.dllearner.kb.SparqlEndpointKS;
import org.dllearner.kb.sparql.SparqlEndpoint;
import org.dllearner.reasoning.SPARQLReasoner;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/aksw/autosparql/tbsl/algorithm/templator/GoldTemplateGenerator.class */
public class GoldTemplateGenerator {
    public GoldTemplateGenerator() {
        SPARQLReasoner sPARQLReasoner = new SPARQLReasoner(new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpedia()));
        List<Question> loadAndSerializeQuestions = QaldLoader.loadAndSerializeQuestions(Arrays.asList("en"), "de_wac_175m_600.crf.ser.gz", "english.all.3class.distsim.crf.ser.gz", "german-dewac.tagger", "english-left3words-distsim.tagger", false);
        TriplePatternExtractor triplePatternExtractor = new TriplePatternExtractor();
        HashMap hashMap = new HashMap();
        for (Question question : loadAndSerializeQuestions) {
            if (!question.outOfScope.booleanValue()) {
                String str = question.sparqlQuery;
                HashMap hashMap2 = new HashMap();
                Matcher matcher = Pattern.compile("\\b\\w+:(\\w|\\.)+\\b").matcher(str);
                int i = 0;
                HashSet hashSet = new HashSet();
                while (matcher.find()) {
                    String group = matcher.group();
                    if (!group.equals("rdf:type") && !group.startsWith("xsd") && !hashSet.contains(group)) {
                        int i2 = i;
                        i++;
                        String str2 = "var" + i2;
                        str = str.replaceAll("\\b" + group + "\\b", "?" + str2);
                        hashMap2.put(str2, group);
                        hashSet.add(group);
                    }
                }
                Query create = QueryFactory.create(str, Syntax.syntaxARQ);
                GoldTemplate goldTemplate = new GoldTemplate(create);
                Set<Triple> extractTriplePattern = triplePatternExtractor.extractTriplePattern(create);
                HashSet hashSet2 = new HashSet();
                for (Triple triple : extractTriplePattern) {
                    if (triple.getPredicate().equals(RDF.type.asNode())) {
                        String name = triple.getSubject().getName();
                        String str3 = (String) hashMap2.get(name);
                        if (str3 != null && !hashSet2.contains(name)) {
                            goldTemplate.addSlot(new Slot(name, SlotType.RESOURCE, Collections.singletonList(str3.substring(str3.indexOf(58) + 1, str3.length()))));
                            hashSet2.add(name);
                        }
                        String name2 = triple.getObject().getName();
                        String str4 = (String) hashMap2.get(name2);
                        if (str4 != null) {
                            goldTemplate.addSlot(new Slot(name2, SlotType.CLASS, Collections.singletonList(str4.substring(str4.indexOf(58) + 1, str4.length()))));
                            hashMap2.remove(name2);
                        }
                    } else if (triple.getPredicate().isVariable()) {
                        String name3 = triple.getSubject().getName();
                        String str5 = (String) hashMap2.get(name3);
                        if (str5 != null) {
                            goldTemplate.addSlot(new Slot(name3, SlotType.RESOURCE, Collections.singletonList(str5.substring(str5.indexOf(58) + 1, str5.length()))));
                            hashMap2.remove(name3);
                        }
                        String name4 = triple.getPredicate().getName();
                        String str6 = (String) hashMap2.get(name4);
                        String substring = str6.substring(str6.indexOf(58) + 1, str6.length());
                        String expandPrefixedName = create.expandPrefixedName(str6);
                        Slot slot = null;
                        if (sPARQLReasoner.isDataProperty(expandPrefixedName, true)) {
                            slot = new Slot(name4, SlotType.DATATYPEPROPERTY, Collections.singletonList(substring));
                        } else if (sPARQLReasoner.isObjectProperty(expandPrefixedName, true)) {
                            slot = new Slot(name4, SlotType.OBJECTPROPERTY, Collections.singletonList(substring));
                            String name5 = triple.getObject().getName();
                            String str7 = (String) hashMap2.get(name5);
                            if (str7 != null) {
                                goldTemplate.addSlot(new Slot(name5, SlotType.RESOURCE, Collections.singletonList(str7.substring(str7.indexOf(58) + 1, str7.length()))));
                            }
                        } else {
                            System.err.println(expandPrefixedName);
                        }
                        if (!hashSet2.contains(name4)) {
                            goldTemplate.addSlot(slot);
                            hashSet2.add(name4);
                        }
                    }
                }
                hashMap.put(question.id, goldTemplate);
            }
        }
        serialize(hashMap);
    }

    private void serialize(Map<Integer, GoldTemplate> map) {
        try {
            Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new File(QaldLoader.class.getResource("/qald/dbpedia-train.xml").getFile()));
            parse.getDocumentElement().normalize();
            NodeList elementsByTagName = parse.getElementsByTagName("question");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                Element element = (Element) elementsByTagName.item(i);
                GoldTemplate goldTemplate = map.get(Integer.valueOf(Integer.valueOf(element.getAttribute("id")).intValue()));
                if (goldTemplate != null) {
                    Element createElement = parse.createElement("template");
                    element.appendChild(createElement);
                    Element createElement2 = parse.createElement("query");
                    createElement.appendChild(createElement2);
                    createElement2.appendChild(parse.createCDATASection(goldTemplate.getQuery().toString()));
                    Element createElement3 = parse.createElement("slots");
                    createElement.appendChild(createElement3);
                    for (Slot slot : goldTemplate.getSlots()) {
                        Element createElement4 = parse.createElement("slot");
                        Element createElement5 = parse.createElement("anchor");
                        createElement5.appendChild(parse.createCDATASection(slot.getAnchor()));
                        createElement4.appendChild(createElement5);
                        Element createElement6 = parse.createElement("type");
                        createElement6.appendChild(parse.createCDATASection(slot.getSlotType().name()));
                        createElement4.appendChild(createElement6);
                        Element createElement7 = parse.createElement("token");
                        createElement7.appendChild(parse.createCDATASection(slot.getWords().get(0)));
                        createElement4.appendChild(createElement7);
                        createElement3.appendChild(createElement4);
                    }
                }
            }
            Transformer newTransformer = TransformerFactory.newInstance().newTransformer();
            newTransformer.setOutputProperty("indent", "yes");
            newTransformer.transform(new DOMSource(parse), new StreamResult(new File("dbpedia-train-with-templates.xml")));
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParserConfigurationException e2) {
            e2.printStackTrace();
        } catch (TransformerConfigurationException e3) {
            e3.printStackTrace();
        } catch (TransformerException e4) {
            e4.printStackTrace();
        } catch (DOMException e5) {
            e5.printStackTrace();
        } catch (SAXException e6) {
            e6.printStackTrace();
        }
    }

    public static void main(String[] strArr) throws Exception {
        new GoldTemplateGenerator();
    }
}
