package net.sansa_stack.examples.spark.ml.DistRDF2ML;

import java.io.File;
import java.io.PrintWriter;
import java.util.Calendar;
import net.sansa_stack.ml.spark.featureExtraction.SmartVectorAssembler;
import net.sansa_stack.ml.spark.featureExtraction.SparqlFrame;
import net.sansa_stack.rdf.common.io.riot.error.ErrorParseMode$;
import net.sansa_stack.rdf.common.io.riot.error.WarningParseMode$;
import net.sansa_stack.rdf.spark.io.NTripleReader$;
import net.sansa_stack.rdf.spark.model.package$;
import org.apache.jena.graph.Triple;
import org.apache.jena.sys.JenaSystem;
import org.apache.spark.ml.classification.RandomForestClassifier;
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
import org.apache.spark.ml.feature.IndexToString;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.ml.feature.StringIndexerModel;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.functions$;
import scala.Predef$;
import scala.collection.ArrayOps$;
import scala.collection.StringOps$;
import scala.io.Codec$;
import scala.io.Source$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;

/* compiled from: DistRDF2ML_Evaluation.scala */
/* loaded from: input_file:net/sansa_stack/examples/spark/ml/DistRDF2ML/DistRDF2ML_Evaluation$.class */
public final class DistRDF2ML_Evaluation$ {
    public static final DistRDF2ML_Evaluation$ MODULE$ = new DistRDF2ML_Evaluation$();

    public void main(String[] strArr) {
        String str = strArr[0];
        String str2 = strArr[1];
        String stripPrefix$extension = StringOps$.MODULE$.stripPrefix$extension(Predef$.MODULE$.augmentString(StringOps$.MODULE$.stripSuffix$extension(Predef$.MODULE$.augmentString(str2.split("\\?")[1]), " ")), " ");
        String stripPrefix$extension2 = StringOps$.MODULE$.stripPrefix$extension(Predef$.MODULE$.augmentString(StringOps$.MODULE$.stripSuffix$extension(Predef$.MODULE$.augmentString(str2.split("\\?")[2]), " ")), " ");
        String date = Calendar.getInstance().getTime().toString();
        String str3 = strArr[3];
        String str4 = strArr[2];
        Predef$.MODULE$.println("\nSETUP SPARK SESSION");
        long nanoTime = System.nanoTime();
        SparkSession orCreate = SparkSession$.MODULE$.builder().appName("SampleFeatureExtractionPipeline").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", String.join(", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify")).getOrCreate();
        orCreate.sparkContext().setLogLevel("ERROR");
        JenaSystem.init();
        double nanoTime2 = (System.nanoTime() - nanoTime) / 1.0E9d;
        Predef$.MODULE$.println(new StringBuilder(14).append("\ntime needed: ").append(nanoTime2).toString());
        Predef$.MODULE$.println("spark information");
        Predef$.MODULE$.println(orCreate.sparkContext().getExecutorMemoryStatus());
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps(orCreate.sparkContext().getConf().getAll()), obj -> {
            $anonfun$main$1(obj);
            return BoxedUnit.UNIT;
        });
        Predef$.MODULE$.println("\nREAD IN DATA");
        long nanoTime3 = System.nanoTime();
        Dataset persist = package$.MODULE$.TripleOperations(NTripleReader$.MODULE$.load(orCreate, str, ErrorParseMode$.MODULE$.SKIP(), WarningParseMode$.MODULE$.IGNORE(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6())).toDS().persist();
        Predef$.MODULE$.println(new StringBuilder(26).append("\ndata consists of ").append(persist.count()).append(" triples").toString());
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((Object[]) persist.take(10)), triple -> {
            $anonfun$main$2(triple);
            return BoxedUnit.UNIT;
        });
        double nanoTime4 = (System.nanoTime() - nanoTime3) / 1.0E9d;
        Predef$.MODULE$.println(new StringBuilder(14).append("\ntime needed: ").append(nanoTime4).toString());
        Predef$.MODULE$.println("\nFEATURE EXTRACTION OVER SPARQL");
        long nanoTime5 = System.nanoTime();
        Dataset persist2 = new SparqlFrame().setSparqlQuery(str2).setCollapsByKey(true).transform(persist).persist();
        long count = persist2.count();
        persist2.show(false);
        Predef$.MODULE$.println(new StringBuilder(25).append("extractedFeaturesDfSize: ").append(count).toString());
        double nanoTime6 = (System.nanoTime() - nanoTime5) / 1.0E9d;
        Predef$.MODULE$.println(new StringBuilder(14).append("\ntime needed: ").append(nanoTime6).toString());
        persist.unpersist();
        Predef$.MODULE$.println("\nSMART VECTOR ASSEMBLER");
        long nanoTime7 = System.nanoTime();
        String str5 = ((String[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps(persist2.columns()), str6 -> {
            return BoxesRunTime.boxToBoolean($anonfun$main$3(stripPrefix$extension2, str6));
        }))[0];
        Predef$.MODULE$.println(new StringBuilder(49).append("svaEntityColumn ").append(stripPrefix$extension).append(" svaLabelColumn ").append(stripPrefix$extension2).append(" labelColumnName ").append(str5).toString());
        Dataset persist3 = new SmartVectorAssembler().setEntityColumn(stripPrefix$extension).setLabelColumn(str5).setNullReplacement("string", "").setNullReplacement("digit", BoxesRunTime.boxToInteger(-1)).setWord2VecSize(2).setWord2VecMinCount(1).transform(persist2).persist();
        persist3.show(false);
        long count2 = persist3.count();
        Predef$.MODULE$.println(new StringBuilder(22).append("assembled df has ").append(count2).append(" rows").toString());
        double nanoTime8 = (System.nanoTime() - nanoTime7) / 1.0E9d;
        Predef$.MODULE$.println(new StringBuilder(14).append("\ntime needed: ").append(nanoTime8).toString());
        persist2.unpersist();
        Predef$.MODULE$.println("\nAPPLY Common SPARK MLlib Example Algorithm");
        Dataset withColumnRenamed = persist3.select(ScalaRunTime$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("entityID"), functions$.MODULE$.explode(functions$.MODULE$.col("label")), functions$.MODULE$.col("features")})).withColumnRenamed("col", "label");
        withColumnRenamed.show();
        long nanoTime9 = System.nanoTime();
        StringIndexerModel handleInvalid = new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(withColumnRenamed).setHandleInvalid("skip");
        Dataset transform = handleInvalid.transform(withColumnRenamed);
        Dataset transform2 = new RandomForestClassifier().setLabelCol("indexedLabel").setFeaturesCol("features").setNumTrees(10).fit(transform.distinct()).transform(transform);
        new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(handleInvalid.labelsArray()[0]).transform(transform2).select("entityID", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"label", "predictedLabel"})).show(false);
        Predef$.MODULE$.println(new StringBuilder(13).append("Test Error = ").append(1.0d - new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction").setMetricName("accuracy").evaluate(transform2)).toString());
        double nanoTime10 = (System.nanoTime() - nanoTime9) / 1.0E9d;
        Predef$.MODULE$.println(new StringBuilder(14).append("\ntime needed: ").append(nanoTime10).toString());
        orCreate.stop();
        String sb = new StringBuilder(15).append(str4).append("DistRDF2ML_").append(date.replace(":", "").replace(" ", "")).append(".txt").toString();
        PrintWriter printWriter = new PrintWriter(new File(sb));
        printWriter.write(new StringBuilder(12).append("datetime: ").append(date).append(" \n").toString());
        printWriter.write(new StringBuilder(13).append("inputPath: ").append(str).append(" \n").toString());
        printWriter.write(new StringBuilder(16).append("sparqlString: ").append(str2).append(" \n").toString());
        printWriter.write(new StringBuilder(23).append("sparqlFrameCollapse: ").append(true).append(" \n").toString());
        printWriter.write(new StringBuilder(19).append("assembledDfSize: ").append(count2).append(" \n").toString());
        printWriter.write(new StringBuilder(19).append("svaEntityColumn: ").append(stripPrefix$extension).append(" \n").toString());
        printWriter.write(new StringBuilder(18).append("svaLabelColumn: ").append(stripPrefix$extension2).append(" \n").toString());
        printWriter.write(new StringBuilder(19).append("svaWord2VecSize: ").append(2).append(" \n").toString());
        printWriter.write(new StringBuilder(23).append("svaWord2VecMinCount: ").append(1).append(" \n").toString());
        printWriter.write(new StringBuilder(12).append("comments: ").append(str3).append(" \n").toString());
        printWriter.write(new StringBuilder(18).append("timeSparkSetup: ").append(nanoTime2).append(" \n").toString());
        printWriter.write(new StringBuilder(14).append("timeReadIn: ").append(nanoTime4).append(" \n").toString());
        printWriter.write(new StringBuilder(19).append("sparqlFrameTime: ").append(nanoTime6).append(" \n").toString());
        printWriter.write(new StringBuilder(28).append("timeSmartVectorAssembler: ").append(nanoTime8).append(" \n").toString());
        printWriter.write(new StringBuilder(18).append("timeSparkMLlib: ").append(nanoTime10).append(" \n").toString());
        printWriter.close();
        Source$.MODULE$.fromFile(sb, Codec$.MODULE$.fallbackSystemCodec()).foreach(obj2 -> {
            $anonfun$main$4(BoxesRunTime.unboxToChar(obj2));
            return BoxedUnit.UNIT;
        });
    }

    public static final /* synthetic */ void $anonfun$main$1(Object obj) {
        Predef$.MODULE$.println(obj);
    }

    public static final /* synthetic */ void $anonfun$main$2(Triple triple) {
        Predef$.MODULE$.println(triple);
    }

    public static final /* synthetic */ boolean $anonfun$main$3(String str, String str2) {
        return str2.contains(str);
    }

    public static final /* synthetic */ void $anonfun$main$4(char c) {
        Predef$.MODULE$.print(BoxesRunTime.boxToCharacter(c));
    }

    private DistRDF2ML_Evaluation$() {
    }
}
