package net.sansa_stack.examples.spark.ml.Similarity;

import net.sansa_stack.ml.spark.featureExtraction.SmartFeatureExtractor;
import net.sansa_stack.ml.spark.featureExtraction.SparqlFrame;
import net.sansa_stack.rdf.common.io.riot.error.ErrorParseMode$;
import net.sansa_stack.rdf.common.io.riot.error.WarningParseMode$;
import net.sansa_stack.rdf.spark.io.NTripleReader$;
import net.sansa_stack.rdf.spark.model.package$;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.Triple;
import org.apache.jena.sys.JenaSystem;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.ArrayOps$;
import scala.collection.StringOps$;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;

/* compiled from: SmartFeatureExtractorEvaluation.scala */
/* loaded from: input_file:net/sansa_stack/examples/spark/ml/Similarity/SmartFeatureExtractorEvaluation$.class */
public final class SmartFeatureExtractorEvaluation$ {
    public static final SmartFeatureExtractorEvaluation$ MODULE$ = new SmartFeatureExtractorEvaluation$();

    public void main(String[] strArr) {
        Dataset createDataFrame;
        Dataset transform;
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        String str4 = strArr[3];
        String str5 = strArr[4];
        long nanoTime = System.nanoTime();
        SparkSession orCreate = SparkSession$.MODULE$.builder().appName("SampleFeatureExtractionPipeline").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", String.join(", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify")).getOrCreate();
        orCreate.sparkContext().setLogLevel("ERROR");
        JenaSystem.init();
        Predef$.MODULE$.println(new StringBuilder(29).append("\ntime needed timeSparkSetup: ").append((System.nanoTime() - nanoTime) / 1.0E9d).toString());
        long nanoTime2 = System.nanoTime();
        Dataset cache = package$.MODULE$.TripleOperations(NTripleReader$.MODULE$.load(orCreate, str, ErrorParseMode$.MODULE$.SKIP(), WarningParseMode$.MODULE$.IGNORE(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6())).toDS().cache();
        Predef$.MODULE$.println(BoxesRunTime.boxToLong(cache.count()));
        Predef$.MODULE$.println(new StringBuilder(23).append("\ntime needed timeRead: ").append((System.nanoTime() - nanoTime2) / 1.0E9d).toString());
        long nanoTime3 = System.nanoTime();
        if (str2 != null ? str2.equals("o") : "o" == 0) {
            Predef$.MODULE$.println("filter by object");
            createDataFrame = package$.MODULE$.TripleOperations(cache.filter(triple -> {
                return BoxesRunTime.boxToBoolean($anonfun$main$1(str4, triple));
            }).rdd()).toDF().select("s", Nil$.MODULE$).withColumnRenamed("s", "seed");
        } else if (str2 != null ? !str2.equals("s") : "s" != 0) {
            createDataFrame = orCreate.createDataFrame(cache.rdd().flatMap(triple2 -> {
                return new $colon.colon(triple2.getSubject(), new $colon.colon(triple2.getObject(), Nil$.MODULE$));
            }, ClassTag$.MODULE$.apply(Node.class)).filter(node -> {
                return BoxesRunTime.boxToBoolean(node.isURI());
            }).map(node2 -> {
                return node2.toString();
            }, ClassTag$.MODULE$.apply(String.class)).distinct().map(str6 -> {
                return Row$.MODULE$.apply(ScalaRunTime$.MODULE$.genericWrapArray(new Object[]{str6}));
            }, ClassTag$.MODULE$.apply(Row.class)), new StructType().add(new StructField("seed", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())));
        } else {
            Predef$.MODULE$.println("filter by sparql");
            Dataset transform2 = new SparqlFrame().setSparqlQuery(str4).transform(cache);
            createDataFrame = transform2.withColumnRenamed(transform2.columns()[0], "seed");
        }
        Dataset dataset = createDataFrame;
        Predef$.MODULE$.println(new Tuple2("seeds count: ", BoxesRunTime.boxToLong(dataset.count())));
        Predef$.MODULE$.println(new StringBuilder(27).append("\ntime needed gather seeds: ").append((System.nanoTime() - nanoTime3) / 1.0E9d).toString());
        System.nanoTime();
        String[] strArr2 = (String[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[]) dataset.collect()), row -> {
            return (String) row.getAs(0);
        }, ClassTag$.MODULE$.apply(String.class));
        Encoders$.MODULE$.kryo(Row.class);
        Encoders$.MODULE$.kryo(Tuple3.class);
        Dataset cache2 = package$.MODULE$.TripleOperations(cache.filter(triple3 -> {
            return BoxesRunTime.boxToBoolean($anonfun$main$7(strArr2, triple3));
        }).map(triple4 -> {
            return triple4;
        }, Encoders$.MODULE$.kryo(ClassTag$.MODULE$.apply(Triple.class))).rdd()).toDS().cache();
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((Object[]) cache2.take(10)), triple5 -> {
            $anonfun$main$9(triple5);
            return BoxedUnit.UNIT;
        });
        package$.MODULE$.TripleOperations(cache2.rdd()).toDF().show();
        Predef$.MODULE$.println("sample feature extraction SPARQL corresponding to SmartFeatureExtractor function");
        Tuple2[] tuple2Arr = (Tuple2[]) ArrayOps$.MODULE$.sortWith$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.distinct$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[]) cache2.collect()), triple6 -> {
            return triple6.getPredicate().toString();
        }, ClassTag$.MODULE$.apply(String.class))))), str7 -> {
            return new Tuple2(str7, new StringBuilder(1).append("?").append(((String) ArrayOps$.MODULE$.last$extension(Predef$.MODULE$.refArrayOps(str7.split("/")))).replace("#", "_").replace(".", "").replace("-", "")).toString());
        }, ClassTag$.MODULE$.apply(Tuple2.class))), (tuple2, tuple22) -> {
            return BoxesRunTime.boxToBoolean($anonfun$main$12(tuple2, tuple22));
        });
        String sb = new StringBuilder(38).append("SELECT ?seed ").append(Predef$.MODULE$.wrapRefArray((String[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(tuple2Arr), tuple23 -> {
            return (String) tuple23._2();
        }, ClassTag$.MODULE$.apply(String.class))).mkString(" ")).append(" \nWHERE {\n?seed ?p <").append(str4).append("> .\n").append(Predef$.MODULE$.wrapRefArray((String[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(tuple2Arr), tuple24 -> {
            return new StringBuilder(22).append("OPTIONAL {?seed <").append(tuple24._1()).append("> ").append(((String) tuple24._2()).toString()).append(" .}").toString();
        }, ClassTag$.MODULE$.apply(String.class))).mkString(" \n")).append("}").toString();
        Predef$.MODULE$.println(sb);
        Predef$.MODULE$.println("now we do feature extraction");
        long nanoTime4 = System.nanoTime();
        if (str3 != null ? !str3.equals("SparqlFrame") : "SparqlFrame" != 0) {
            Predef$.MODULE$.println("DaSimEstimator: Feature Extraction by SmartFeatureExtractor");
            Encoders$.MODULE$.kryo(ClassTag$.MODULE$.apply(Triple.class));
            transform = new SmartFeatureExtractor().setEntityColumnName("s").transform(cache2);
        } else {
            Predef$.MODULE$.println("DaSimEstimator: Feature Extraction by SparqlFrame");
            transform = new SparqlFrame().setSparqlQuery(sb).setCollapsByKey(true).setCollapsColumnName("seed").transform(cache2);
        }
        Dataset cache3 = transform.cache();
        cache3.show();
        Predef$.MODULE$.println(new Tuple2("feature df count: ", BoxesRunTime.boxToLong(cache3.count())));
        Predef$.MODULE$.println(new StringBuilder(31).append("\ntime needed extract features: ").append((System.nanoTime() - nanoTime4) / 1.0E9d).toString());
        orCreate.stop();
    }

    public static final /* synthetic */ boolean $anonfun$main$1(String str, Triple triple) {
        return triple.getObject().toString().equals(str);
    }

    public static final /* synthetic */ boolean $anonfun$main$7(String[] strArr, Triple triple) {
        return ArrayOps$.MODULE$.contains$extension(Predef$.MODULE$.refArrayOps(strArr), triple.getSubject().toString());
    }

    public static final /* synthetic */ void $anonfun$main$9(Triple triple) {
        Predef$.MODULE$.println(triple);
    }

    public static final /* synthetic */ boolean $anonfun$main$12(Tuple2 tuple2, Tuple2 tuple22) {
        return StringOps$.MODULE$.$less$extension(Predef$.MODULE$.augmentString((String) tuple2._2()), (String) tuple22._2());
    }

    private SmartFeatureExtractorEvaluation$() {
    }
}
