package org.aksw.sparqltools.correlation;

import com.google.common.base.Charsets;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Sets;
import com.google.common.hash.Hashing;
import com.google.common.util.concurrent.MoreExecutors;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.net.URLEncoder;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx;
import org.aksw.jena_sparql_api.cache.extra.CacheFrontendImpl;
import org.aksw.jena_sparql_api.cache.extra.SqlUtils;
import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
import org.aksw.sparqltools.util.H2CacheBackend;
import org.aksw.sparqltools.util.SPARQLEndpoint;
import org.aksw.sparqltools.util.SPARQLEndpointUtils;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
import org.apache.log4j.Logger;
import org.h2.tools.RunScript;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.util.IRIShortFormProvider;
import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
import weka.classifiers.Evaluation;
import weka.classifiers.functions.LinearRegression;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ArffSaver;

/* loaded from: input_file:org/aksw/sparqltools/correlation/NumericDataPropertyCorrelationFinder.class */
public class NumericDataPropertyCorrelationFinder implements CorrelationFinder<LinearCorrelation> {
    File cacheDir;
    private static final Logger logger = Logger.getLogger(NumericDataPropertyCorrelationFinder.class.getName());
    int sampleSize;
    private QueryExecutionFactory qef;
    final IRIShortFormProvider sfp;
    private Connection conn;
    private String QUERY_LOOKUP;
    private String QUERY_INSERT;
    private int minNrOfinstances;
    volatile int progress;

    public NumericDataPropertyCorrelationFinder() {
        this((Connection) null);
    }

    public NumericDataPropertyCorrelationFinder(Connection connection) {
        this.cacheDir = new File("cache");
        this.sampleSize = 1000;
        this.sfp = new SimpleIRIShortFormProvider();
        this.QUERY_LOOKUP = "SELECT * FROM correlation_cache WHERE property1 = ? AND property2 = ? LIMIT 1";
        this.QUERY_INSERT = "INSERT INTO correlation_cache(property1,property2,sampleSize,rValue,pValue,regressionCoefficient,yIntercept) VALUES (?, ?, ?, ?, ?, ?, ?)";
        this.minNrOfinstances = 10;
        this.progress = 1;
        this.conn = connection;
        this.cacheDir.mkdirs();
        if (connection != null) {
            InputStream resourceAsStream = SPARQLEndpointUtils.class.getClassLoader().getResourceAsStream("correlation-cache-schema.sql");
            if (resourceAsStream == null) {
                throw new RuntimeException("Failed to load resource: correlation-cache-schema.sql");
            }
            try {
                RunScript.execute(connection, new InputStreamReader(resourceAsStream));
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    public NumericDataPropertyCorrelationFinder(SPARQLEndpoint sPARQLEndpoint) {
        this.cacheDir = new File("cache");
        this.sampleSize = 1000;
        this.sfp = new SimpleIRIShortFormProvider();
        this.QUERY_LOOKUP = "SELECT * FROM correlation_cache WHERE property1 = ? AND property2 = ? LIMIT 1";
        this.QUERY_INSERT = "INSERT INTO correlation_cache(property1,property2,sampleSize,rValue,pValue,regressionCoefficient,yIntercept) VALUES (?, ?, ?, ?, ?, ?, ?)";
        this.minNrOfinstances = 10;
        this.progress = 1;
        this.qef = new QueryExecutionFactoryHttp(sPARQLEndpoint.getURL().toString());
        try {
            this.qef = new QueryExecutionFactoryCacheEx(this.qef, new CacheFrontendImpl(H2CacheBackend.create(URLEncoder.encode(sPARQLEndpoint.getURL().toString(), "UTF-8"))));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override // org.aksw.sparqltools.correlation.CorrelationFinder
    public Set<LinearCorrelation> getCorrelatedProperties(SPARQLEndpoint sPARQLEndpoint) {
        final SortedSet synchronizedSortedSet = Collections.synchronizedSortedSet(new TreeSet());
        this.qef = new QueryExecutionFactoryHttp(sPARQLEndpoint.getURL().toString());
        try {
            this.qef = new QueryExecutionFactoryCacheEx(this.qef, new CacheFrontendImpl(H2CacheBackend.create(URLEncoder.encode(sPARQLEndpoint.getURL().toString(), "UTF-8"))));
        } catch (Exception e) {
            e.printStackTrace();
        }
        Set<String> numericProperties = getNumericProperties(sPARQLEndpoint);
        TreeSet<SortedSet> treeSet = new TreeSet(new Comparator<SortedSet<String>>() { // from class: org.aksw.sparqltools.correlation.NumericDataPropertyCorrelationFinder.1
            @Override // java.util.Comparator
            public int compare(SortedSet<String> sortedSet, SortedSet<String> sortedSet2) {
                return ComparisonChain.start().compare(sortedSet.first(), sortedSet2.first()).compare(sortedSet.last(), sortedSet2.last()).result();
            }
        });
        for (String str : numericProperties) {
            for (String str2 : numericProperties) {
                if (!str.equals(str2)) {
                    treeSet.add(new TreeSet(Sets.newHashSet(new String[]{str, str2})));
                }
            }
        }
        final int size = (int) (0.05d * treeSet.size());
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(8);
        for (SortedSet sortedSet : treeSet) {
            final String str3 = (String) sortedSet.first();
            final String str4 = (String) sortedSet.last();
            newFixedThreadPool.submit(new Runnable() { // from class: org.aksw.sparqltools.correlation.NumericDataPropertyCorrelationFinder.2
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        LinearCorrelation lookup = NumericDataPropertyCorrelationFinder.this.lookup(str3, str4);
                        if (lookup == null) {
                            lookup = NumericDataPropertyCorrelationFinder.this.computeLinearCorrelation(str3, str4);
                            if (NumericDataPropertyCorrelationFinder.this.useDatabase()) {
                                NumericDataPropertyCorrelationFinder.this.write(lookup);
                            }
                        }
                        synchronizedSortedSet.add(lookup);
                    } catch (SQLException e2) {
                        NumericDataPropertyCorrelationFinder.logger.error("Error when analyzing " + str3 + " and " + str4, e2);
                    } catch (NoInstanceDataException e3) {
                        NumericDataPropertyCorrelationFinder.logger.trace(e3.getMessage());
                    } catch (NoLinearCorrelationException e4) {
                        NumericDataPropertyCorrelationFinder.logger.trace(e4.getMessage());
                    } catch (Exception e5) {
                        NumericDataPropertyCorrelationFinder.logger.error("Error when analyzing " + str3 + " and " + str4, e5);
                    } catch (OutOfRangeException e6) {
                        NumericDataPropertyCorrelationFinder.logger.error(e6);
                    }
                    if (NumericDataPropertyCorrelationFinder.this.progress % size == 0) {
                        NumericDataPropertyCorrelationFinder.logger.debug("Progress:" + ((5 * NumericDataPropertyCorrelationFinder.this.progress) / size) + "%");
                    }
                    NumericDataPropertyCorrelationFinder.this.progress++;
                }
            });
        }
        MoreExecutors.shutdownAndAwaitTermination(newFixedThreadPool, 1L, TimeUnit.DAYS);
        return synchronizedSortedSet;
    }

    public LinearCorrelation computeLinearCorrelation(String str, String str2) throws NoLinearCorrelationException, SQLException, NoInstanceDataException {
        return computeLinearCorrelation(str, str2, false);
    }

    public LinearCorrelation computeLinearCorrelation(String str, String str2, boolean z) throws NoLinearCorrelationException, SQLException, NoInstanceDataException {
        logger.trace("Checking for correlation between " + str + " and " + str2 + "...");
        Instances commonData = getCommonData(str, str2);
        PearsonsCorrelation computeCorrelation = computeCorrelation(commonData);
        double entry = computeCorrelation.getCorrelationMatrix().getEntry(0, 1);
        double entry2 = computeCorrelation.getCorrelationPValues().getEntry(0, 1);
        logger.debug(LinearCorrelation.getInterpretation(str, str2, entry, entry2));
        commonData.setClassIndex(commonData.numAttributes() - 1);
        LinearRegression linearRegression = new LinearRegression();
        try {
            linearRegression.buildClassifier(commonData);
            logger.debug(linearRegression);
            if (z) {
                writeArffFile(commonData, new File("./data/test.arff"));
            }
            Evaluation evaluation = new Evaluation(commonData);
            evaluation.crossValidateModel(linearRegression, commonData, 10, new Random(1L), new Object[0]);
            System.out.println(evaluation.toSummaryString(true));
            return new LinearCorrelation(str, str2, commonData.numInstances(), entry, entry2, linearRegression.coefficients()[0], linearRegression.coefficients()[2]);
        } catch (Exception e) {
            logger.error("Error when computing model for " + str + " and " + str2, e);
            throw new NoLinearCorrelationException(str, str2, entry, entry2);
        }
    }

    public void setSampleSize(int i) {
        this.sampleSize = i;
    }

    private PearsonsCorrelation computeCorrelation(Instances instances) throws NoLinearCorrelationException, SQLException {
        double[][] dArr = new double[instances.numInstances()][2];
        for (int i = 0; i < instances.numInstances(); i++) {
            Instance instance = instances.instance(i);
            dArr[i][0] = instance.value(0);
            dArr[i][1] = instance.value(1);
        }
        PearsonsCorrelation pearsonsCorrelation = new PearsonsCorrelation(dArr);
        double entry = pearsonsCorrelation.getCorrelationMatrix().getEntry(0, 1);
        double entry2 = pearsonsCorrelation.getCorrelationPValues().getEntry(0, 1);
        if (Double.isNaN(entry)) {
            String name = instances.attribute(0).name();
            String name2 = instances.attribute(1).name();
            write(name, name2, instances.numInstances());
            throw new NoLinearCorrelationException(name, name2, entry, entry2);
        }
        if (entry2 < 0.05d && entry >= 0.6d) {
            return pearsonsCorrelation;
        }
        String name3 = instances.attribute(0).name();
        String name4 = instances.attribute(1).name();
        write(name3, name4, instances.numInstances(), entry, entry2);
        throw new NoLinearCorrelationException(name3, name4, entry, entry2);
    }

    private Instances getCommonData(String str, String str2) throws NoInstanceDataException, SQLException {
        String.format("SELECT ?o1 ?o2 WHERE {?s <%s> ?o1. ?s <%s> ?o2} LIMIT " + this.sampleSize, str, str2);
        String.format("SELECT ?o1 ?o2 WHERE {?s <%s> ?o1. ?s <%s> ?o2. FILTER NOT EXISTS{?s <%s> ?o3.FILTER(?o1 =! ?o3)} FILTER NOT EXISTS{?s <%s> ?o4.FILTER(?o2 =! ?o4)} } LIMIT " + this.sampleSize, str, str2, str, str2);
        QueryExecution createQueryExecution = this.qef.createQueryExecution(String.format("SELECT ?s (min(?o1) as ?o1) (min(?o2) as ?o2) WHERE {?s <%s> ?o1.?s <%s> ?o2. } GROUP BY ?s HAVING(count(?o1)=1 && count(?o2)=1) LIMIT 1000", str, str2));
        ResultSet execSelect = createQueryExecution.execSelect();
        FastVector fastVector = new FastVector(2);
        this.sfp.getShortForm(IRI.create(str));
        this.sfp.getShortForm(IRI.create(str2));
        fastVector.addElement(new Attribute(str));
        fastVector.addElement(new Attribute(str2));
        Instances instances = new Instances("rel", fastVector, this.sampleSize);
        while (execSelect.hasNext()) {
            QuerySolution next = execSelect.next();
            try {
                Instance instance = new Instance(1.0d, new double[]{Double.valueOf(Double.parseDouble(next.getLiteral("o1").getLexicalForm())).doubleValue(), Double.valueOf(Double.parseDouble(next.getLiteral("o2").getLexicalForm())).doubleValue()});
                instance.setDataset(instances);
                instances.add(instance);
            } catch (NumberFormatException e) {
                logger.error(e, e);
            } catch (Exception e2) {
                logger.error(e2, e2);
            }
        }
        createQueryExecution.close();
        if (instances.numInstances() > this.minNrOfinstances) {
            return instances;
        }
        write(str, str2);
        throw new NoInstanceDataException(str, str2);
    }

    private boolean isNumeric(String str) {
        QueryExecution createQueryExecution = this.qef.createQueryExecution(String.format("SELECT ?o WHERE {?s <%s> ?o.} LIMIT 1000", str));
        ResultSet execSelect = createQueryExecution.execSelect();
        if (!execSelect.hasNext()) {
            logger.debug(String.valueOf(str) + "\t:UNKNOWN");
            return false;
        }
        while (execSelect.hasNext()) {
            try {
                Double.parseDouble(execSelect.next().getLiteral("o").getLexicalForm());
            } catch (NumberFormatException e) {
                logger.debug(String.valueOf(str) + "\t:FALSE");
                createQueryExecution.close();
                return false;
            }
        }
        createQueryExecution.close();
        logger.debug(String.valueOf(str) + "\t:TRUE");
        return true;
    }

    private Set<String> getNumericProperties(SPARQLEndpoint sPARQLEndpoint) {
        Throwable th;
        Throwable th2;
        logger.info("Getting numeric property candidates...");
        Set<String> set = null;
        File file = new File(this.cacheDir, String.valueOf(Hashing.md5().newHasher().putString(sPARQLEndpoint.getURL().toString(), Charsets.UTF_8).putString(sPARQLEndpoint.getDefaultGraphIRI().toString(), Charsets.UTF_8).hash().toString()) + ".obj");
        if (file.exists()) {
            th = null;
            try {
                try {
                    ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(file));
                    try {
                        set = (Set) objectInputStream.readObject();
                        if (objectInputStream != null) {
                            objectInputStream.close();
                        }
                    } catch (Throwable th3) {
                        if (objectInputStream != null) {
                            objectInputStream.close();
                        }
                        throw th3;
                    }
                } finally {
                }
            } catch (Exception e) {
                logger.error("Error reading serialized properties.", e);
            }
        } else {
            set = new TreeSet();
            ResultSet execSelect = this.qef.createQueryExecution("SELECT DISTINCT ?p WHERE {?p a owl:DatatypeProperty. FILTER NOT EXISTS{?p rdfs:range xsd:string}}").execSelect();
            while (execSelect.hasNext()) {
                set.add(execSelect.next().getResource("p").getURI());
            }
            Iterator<String> it = set.iterator();
            while (it.hasNext()) {
                if (!isNumeric(it.next())) {
                    it.remove();
                }
            }
            th = null;
            try {
                try {
                    ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(file));
                    try {
                        objectOutputStream.writeObject(set);
                        if (objectOutputStream != null) {
                            objectOutputStream.close();
                        }
                    } catch (Throwable th4) {
                        if (objectOutputStream != null) {
                            objectOutputStream.close();
                        }
                        throw th4;
                    }
                } catch (Exception e2) {
                    logger.error("Error writing properties.", e2);
                }
            } finally {
            }
        }
        logger.debug("done. Got " + set.size() + " candidates.");
        return set;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean useDatabase() {
        return this.conn != null;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public LinearCorrelation lookup(String str, String str2) throws SQLException, NoLinearCorrelationException, NoInstanceDataException {
        if (!useDatabase()) {
            return null;
        }
        java.sql.ResultSet executeCore = SqlUtils.executeCore(this.conn, this.QUERY_LOOKUP, new Object[]{str, str2});
        if (!executeCore.next()) {
            return null;
        }
        int i = executeCore.getInt(3);
        if (i == 0) {
            throw new NoInstanceDataException(str, str2);
        }
        double d = executeCore.getDouble(4);
        double d2 = executeCore.getDouble(5);
        double d3 = executeCore.getDouble(6);
        if (executeCore.wasNull()) {
            throw new NoLinearCorrelationException(str, str2, d, d2);
        }
        return new LinearCorrelation(str, str2, i, d, d2, d3, executeCore.getDouble(7));
    }

    private void write(String str, String str2, int i, double d, double d2) throws SQLException {
        write(str, str2, Integer.valueOf(i), Double.valueOf(d), Double.valueOf(d2), null, null);
    }

    private void write(String str, String str2, Integer num, Double d, Double d2, Double d3, Double d4) throws SQLException {
        if (useDatabase()) {
            Connection connection = this.conn;
            String str3 = this.QUERY_INSERT;
            Object[] objArr = new Object[7];
            objArr[0] = str;
            objArr[1] = str2;
            objArr[2] = num;
            objArr[3] = d == null ? null : Double.valueOf(Math.round(d.doubleValue() * 100.0d) / 100.0d);
            objArr[4] = d2 == null ? null : Double.valueOf(Math.round(d2.doubleValue() * 100.0d) / 100.0d);
            objArr[5] = d3 == null ? null : Double.valueOf(Math.round(d3.doubleValue() * 100.0d) / 100.0d);
            objArr[6] = d4 == null ? null : Double.valueOf(Math.round(d4.doubleValue() * 100.0d) / 100.0d);
            SqlUtils.execute(connection, str3, Void.class, objArr);
        }
    }

    private void write(String str, String str2, int i) throws SQLException {
        write(str, str2, Integer.valueOf(i), null, null, null, null);
    }

    private void write(String str, String str2) throws SQLException {
        write(str, str2, 0);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void write(LinearCorrelation linearCorrelation) throws SQLException {
        write(linearCorrelation.getProperty1(), linearCorrelation.getProperty2(), Integer.valueOf(linearCorrelation.getSampleSize()), Double.valueOf(linearCorrelation.getCorrelationCoefficient()), Double.valueOf(linearCorrelation.getPValue()), Double.valueOf(linearCorrelation.getRegressionCoefficient()), Double.valueOf(linearCorrelation.getYIntercept()));
    }

    private void writeArffFile(Instances instances, File file) throws IOException {
        ArffSaver arffSaver = new ArffSaver();
        arffSaver.setInstances(instances);
        arffSaver.setFile(file);
        arffSaver.writeBatch();
    }
}
