package org.aksw.gerbil.tools;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.semantic.sameas.index.Indexer;
import org.apache.commons.lang.time.DurationFormatUtils;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ResultSet;
import org.apache.jena.rdf.model.RDFNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/gerbil/tools/InitialIndexTool.class */
public class InitialIndexTool {
    private static final String OUTPUT_FOLDER = "indexes/dbpedia";
    private static final String SPARQL_GET = "select distinct ?s ?o where {?s <http://www.w3.org/2002/07/owl#sameAs> ?o}";
    private static final Logger LOGGER = LoggerFactory.getLogger(InitialIndexTool.class);
    private static String service = "http://de.dbpedia.org/sparql";
    private static Object owlSameAs = "<http://www.w3.org/2002/07/owl#sameAs>";

    public static void main(String[] strArr) throws GerbilException, IOException {
        Indexer indexer = new Indexer(OUTPUT_FOLDER);
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat();
        Date time = Calendar.getInstance().getTime();
        LOGGER.info("Start indexing at {}", simpleDateFormat.format(time));
        indexFolder(indexer, strArr[0]);
        indexer.close();
        Date time2 = Calendar.getInstance().getTime();
        LOGGER.info("Indexing finished at {}", simpleDateFormat.format(time2));
        LOGGER.info("Indexing took: " + DurationFormatUtils.formatDurationHMS(time2.getTime() - time.getTime()));
    }

    public static void index(Indexer indexer) throws GerbilException {
        int i = 0;
        boolean z = true;
        Query create = QueryFactory.create(SPARQL_GET);
        create.setLimit(10000);
        HashSet hashSet = new HashSet();
        RDFNode rDFNode = null;
        int i2 = 0;
        long j = 0;
        Date time = Calendar.getInstance().getTime();
        do {
            create.setOffset(i);
            Date time2 = Calendar.getInstance().getTime();
            ResultSet execSelect = QueryExecutionFactory.sparqlService(service, create).execSelect();
            Date time3 = Calendar.getInstance().getTime();
            int i3 = 0;
            long j2 = 0;
            i2++;
            while (execSelect.hasNext()) {
                i3++;
                QuerySolution next = execSelect.next();
                RDFNode rDFNode2 = next.get("s");
                RDFNode rDFNode3 = next.get("o");
                if (rDFNode2.equals(rDFNode)) {
                    hashSet.add(rDFNode3.toString());
                } else if (rDFNode != null) {
                    Date time4 = Calendar.getInstance().getTime();
                    indexer.index(rDFNode.toString(), hashSet);
                    j2 += Calendar.getInstance().getTime().getTime() - time4.getTime();
                    j += hashSet.size();
                    hashSet.clear();
                    hashSet.add(rDFNode3.toString());
                    rDFNode = rDFNode2;
                } else {
                    hashSet.add(rDFNode3.toString());
                    rDFNode = rDFNode2;
                }
            }
            if (i3 < 10000) {
                z = false;
            }
            i += 10000;
            LOGGER.info("Got {} triples...(Sum: {}, AvgTime: {}, QueryTime: {}, IndexTime: {})", new Object[]{Integer.valueOf(i3), Integer.valueOf((10000 * (i2 - 1)) + i3), DurationFormatUtils.formatDurationHMS((Calendar.getInstance().getTime().getTime() - time.getTime()) / i2), DurationFormatUtils.formatDurationHMS(time3.getTime() - time2.getTime()), DurationFormatUtils.formatDurationHMS(j2)});
        } while (z);
        if (!hashSet.isEmpty()) {
            indexer.index(rDFNode.toString(), hashSet);
            hashSet.clear();
        }
        LOGGER.info("Successfully indexed {} triples", Long.valueOf(j));
    }

    public static void indexFolder(Indexer indexer, String str) throws GerbilException, IOException {
        for (File file : new File(str).listFiles()) {
            if (file.getName().endsWith(".nt")) {
                index(indexer, file.getAbsolutePath());
            }
        }
    }

    public static void index(Indexer indexer, String str) throws GerbilException, IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        HashSet hashSet = new HashSet();
        long j = 0;
        long j2 = 0;
        long j3 = 0;
        String str2 = null;
        Date time = Calendar.getInstance().getTime();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String[] split = readLine.split("\\s+");
            if (split[1].equals(owlSameAs)) {
                String replace = split[0].replace("<", "").replace(">", "");
                String str3 = split[2];
                String trim = str3.substring(str3.indexOf("<") + 1, str3.lastIndexOf(">")).trim();
                if (replace.equals(str2)) {
                    hashSet.add(trim.toString());
                } else if (str2 != null) {
                    indexer.index(str2.toString(), hashSet);
                    j += hashSet.size();
                    hashSet.clear();
                    hashSet.add(trim.toString());
                    str2 = replace;
                } else {
                    hashSet.add(trim.toString());
                    str2 = replace;
                }
                j2++;
                if (j2 % 100000 == 0) {
                    j3++;
                    LOGGER.info("Got 100000 triples...(Sum: {}, AvgTime: {})", Long.valueOf(j2), DurationFormatUtils.formatDurationHMS((Calendar.getInstance().getTime().getTime() - time.getTime()) / j3));
                }
            }
        }
        if (!hashSet.isEmpty()) {
            indexer.index(str2.toString(), hashSet);
            hashSet.clear();
        }
        bufferedReader.close();
        LOGGER.info("Successfully indexed {} triples", Long.valueOf(j));
    }
}
