package net.sansa_stack.query.spark.ontop;

import java.io.File;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import net.sansa_stack.query.spark.ontop.VerticalPartitioner;
import net.sansa_stack.rdf.common.partition.core.RdfPartitionComplex;
import net.sansa_stack.rdf.common.partition.core.RdfPartitionerComplex$;
import net.sansa_stack.rdf.spark.io.package;
import net.sansa_stack.rdf.spark.io.package$;
import net.sansa_stack.rdf.spark.partition.core.RdfPartitionUtilsSpark$;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.vocabulary.RDF;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.catalyst.ScalaReflection$;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLOntology;
import scala.MatchError;
import scala.Predef$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.JavaConverters$;
import scala.collection.immutable.Map;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scopt.OParser;
import scopt.OParser$;
import scopt.OParserBuilder;
import scopt.Read$;

/* compiled from: VerticalPartitioner.scala */
/* loaded from: input_file:net/sansa_stack/query/spark/ontop/VerticalPartitioner$.class */
public final class VerticalPartitioner$ {
    public static final VerticalPartitioner$ MODULE$ = null;
    private final String warehouseLocation;
    private final OParserBuilder<VerticalPartitioner.Config> builder;
    private final OParser<BoxedUnit, VerticalPartitioner.Config> parser;
    private final boolean estimatePartitions;
    private final int threshold;

    static {
        new VerticalPartitioner$();
    }

    public String warehouseLocation() {
        return this.warehouseLocation;
    }

    public OParserBuilder<VerticalPartitioner.Config> builder() {
        return this.builder;
    }

    public OParser<BoxedUnit, VerticalPartitioner.Config> parser() {
        return this.parser;
    }

    public void main(String[] strArr) {
        Some parse = OParser$.MODULE$.parse(parser(), Predef$.MODULE$.wrapRefArray(strArr), new VerticalPartitioner.Config(VerticalPartitioner$Config$.MODULE$.apply$default$1(), VerticalPartitioner$Config$.MODULE$.apply$default$2(), VerticalPartitioner$Config$.MODULE$.apply$default$3(), VerticalPartitioner$Config$.MODULE$.apply$default$4()));
        if (!(parse instanceof Some)) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            run((VerticalPartitioner.Config) parse.x());
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
    }

    private void run(VerticalPartitioner.Config config) {
        SparkSession orCreate = SparkSession$.MODULE$.builder().appName("vpartitioner").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", String.join(", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator")).config("spark.sql.warehouse.dir", config.outputPath().getPath()).config("spark.sql.cbo.enabled", true).config("spark.sql.statistics.histogram.enabled", true).enableHiveSupport().getOrCreate();
        package.RDFReader RDFReader = package$.MODULE$.RDFReader(orCreate);
        RDD rdd = (RDD) RDFReader.ntriples(RDFReader.ntriples$default$1()).apply(config.inputPath().getPath());
        if (config.schemaPath() != null) {
            OWLOntology loadOntologyFromOntologyDocument = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(IRI.create(config.schemaPath()));
            rdd = rdd.filter(new VerticalPartitioner$$anonfun$run$1(((Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(loadOntologyFromOntologyDocument.getObjectPropertiesInSignature().iterator()).asScala()).map(new VerticalPartitioner$$anonfun$5()).toSet().$plus$plus(((Iterator) JavaConverters$.MODULE$.asScalaIteratorConverter(loadOntologyFromOntologyDocument.getDataPropertiesInSignature().iterator()).asScala()).map(new VerticalPartitioner$$anonfun$6()).toSet()).$plus$plus(Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{RDF.type.getURI()})))));
        }
        rdd.cache();
        Map partitionGraph = RdfPartitionUtilsSpark$.MODULE$.partitionGraph(rdd, RdfPartitionerComplex$.MODULE$, ClassTag$.MODULE$.apply(RdfPartitionComplex.class));
        partitionGraph.foreach(new VerticalPartitioner$$anonfun$run$2(config, orCreate));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"num partitions: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(partitionGraph.size())})));
    }

    private Tuple2<Object, Object> estimatePartioningColumns(Dataset<Row> dataset) {
        return new Tuple2.mcJJ.sp(dataset.select("s", Predef$.MODULE$.wrapRefArray(new String[0])).distinct().count(), dataset.select("o", Predef$.MODULE$.wrapRefArray(new String[0])).distinct().count());
    }

    public boolean estimatePartitions() {
        return this.estimatePartitions;
    }

    public int threshold() {
        return this.threshold;
    }

    public Object net$sansa_stack$query$spark$ontop$VerticalPartitioner$$createSparkTable(SparkSession sparkSession, RdfPartitionComplex rdfPartitionComplex, RDD<Row> rdd, boolean z) {
        String escapeTablename = escapeTablename(createTableName(rdfPartitionComplex));
        Dataset<Row> createDataFrame = sparkSession.createDataFrame(rdd, ScalaReflection$.MODULE$.schemaFor(rdfPartitionComplex.layout().schema()).dataType());
        if (sparkSession.catalog().tableExists(escapeTablename)) {
            return BoxedUnit.UNIT;
        }
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"creating Spark table ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{escapeTablename})));
        DataFrameWriter format = createDataFrame.write().format("parquet");
        if (estimatePartitions()) {
            Tuple2<Object, Object> estimatePartioningColumns = estimatePartioningColumns(createDataFrame);
            if (estimatePartioningColumns == null) {
                throw new MatchError(estimatePartioningColumns);
            }
            Tuple2.mcJJ.sp spVar = new Tuple2.mcJJ.sp(estimatePartioningColumns._1$mcJ$sp(), estimatePartioningColumns._2$mcJ$sp());
            long _1$mcJ$sp = spVar._1$mcJ$sp();
            long _2$mcJ$sp = spVar._2$mcJ$sp();
            double doubleValue = _2$mcJ$sp / Predef$.MODULE$.long2Long(_1$mcJ$sp).doubleValue();
            Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"partition estimates: |s|=", " |o|=", " ratio o/s=", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(_1$mcJ$sp), BoxesRunTime.boxToLong(_2$mcJ$sp), BoxesRunTime.boxToDouble(doubleValue)})));
            if (_1$mcJ$sp <= threshold()) {
                format = format.partitionBy(Predef$.MODULE$.wrapRefArray(new String[]{"s"}));
            } else if (_2$mcJ$sp <= threshold() && doubleValue < 0.01d) {
                format = format.partitionBy(Predef$.MODULE$.wrapRefArray(new String[]{"o"}));
            }
        }
        format.saveAsTable(escapeTablename);
        if (!z) {
            return BoxedUnit.UNIT;
        }
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"computing statistics for table ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{escapeTablename})));
        return sparkSession.sql(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ANALYZE TABLE `", "` COMPUTE STATISTICS FOR COLUMNS s, o"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{escapeTablename})));
    }

    private String createTableName(RdfPartitionComplex rdfPartitionComplex) {
        String predicate = rdfPartitionComplex.predicate();
        NodeFactory.createURI(rdfPartitionComplex.predicate());
        String datatype = rdfPartitionComplex.datatype();
        return new StringBuilder().append(predicate).append((datatype == null || datatype.isEmpty()) ? "" : new StringBuilder().append("_").append(datatype.substring(datatype.lastIndexOf("/") + 1)).toString()).append(rdfPartitionComplex.langTagPresent() ? "_lang" : "").toString();
    }

    private String escapeTablename(String str) {
        return URLEncoder.encode(str, StandardCharsets.UTF_8.toString()).toLowerCase().replace('%', 'P').replace('.', 'C').replace("-", "dash");
    }

    private VerticalPartitioner$() {
        MODULE$ = this;
        this.warehouseLocation = new File("spark-warehouse").getAbsolutePath();
        this.builder = OParser$.MODULE$.builder();
        this.parser = OParser$.MODULE$.sequence(builder().programName("vpartitioner"), Predef$.MODULE$.wrapRefArray(new OParser[]{builder().head(Predef$.MODULE$.wrapRefArray(new String[]{"vertical partitioner", "0.1"})), builder().opt('i', "input", Read$.MODULE$.uriRead()).required().action(new VerticalPartitioner$$anonfun$1()).text("path to input data"), builder().opt('o', "output", Read$.MODULE$.uriRead()).required().action(new VerticalPartitioner$$anonfun$2()).text("path to output directory"), builder().opt('s', "schema", Read$.MODULE$.uriRead()).optional().action(new VerticalPartitioner$$anonfun$3()).text("an optional file containing the OWL schema to process only object and data properties"), builder().opt('s', "stats", Read$.MODULE$.booleanRead()).action(new VerticalPartitioner$$anonfun$4()).text("compute statistics")}));
        this.estimatePartitions = true;
        this.threshold = 1000;
    }
}
