package net.sansa_stack.examples.spark.ml.Similarity;

import java.util.Calendar;
import net.sansa_stack.ml.spark.featureExtraction.SmartFeatureExtractor;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.JaccardModel;
import net.sansa_stack.ml.spark.utils.FeatureExtractorModel;
import net.sansa_stack.rdf.spark.model.package$;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
import org.apache.jena.riot.Lang;
import org.apache.jena.sys.JenaSystem;
import org.apache.spark.SparkContext;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.feature.IDF;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.ArrayType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.TimestampType$;
import scala.$less$colon$less$;
import scala.Option;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.Tuple4;
import scala.collection.ArrayOps$;
import scala.collection.IterableOnceOps;
import scala.collection.immutable.$colon;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Seq;
import scala.collection.immutable.Set;
import scala.math.Numeric$DoubleIsFractional$;
import scala.math.Ordering$DeprecatedDoubleOrdering$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.RichDouble$;
import scala.runtime.ScalaRunTime$;

/* compiled from: DaSim.scala */
/* loaded from: input_file:net/sansa_stack/examples/spark/ml/Similarity/DaSim$.class */
public final class DaSim$ {
    public static final DaSim$ MODULE$ = new DaSim$();

    public void main(String[] strArr) {
        boolean z = true;
        String str = strArr[0];
        Predef$.MODULE$.println("\nSETUP SPARK SESSION");
        SparkSession orCreate = SparkSession$.MODULE$.builder().appName("SampleFeatureExtractionPipeline").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", String.join(", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify")).getOrCreate();
        orCreate.sparkContext().setLogLevel("ERROR");
        JenaSystem.init();
        Predef$.MODULE$.println("\nREAD IN DATA");
        Dataset cache = package$.MODULE$.TripleOperations(str.endsWith("nt") ? ((RDD) net.sansa_stack.rdf.spark.io.package$.MODULE$.RDFReader(orCreate).rdf(Lang.NTRIPLES).apply(str)).persist() : ((RDD) net.sansa_stack.rdf.spark.io.package$.MODULE$.RDFReader(orCreate).rdf(Lang.TURTLE).apply(str)).persist()).toDS().cache();
        if (1 != 0) {
            Predef$.MODULE$.println(new StringBuilder(26).append("\ndata consists of ").append(cache.count()).append(" triples").toString());
        }
        if (1 != 0) {
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((Object[]) cache.take(10)), triple -> {
                $anonfun$main$1(triple);
                return BoxedUnit.UNIT;
            });
        }
        Predef$.MODULE$.println("FETCH SEEDS by filter");
        Dataset select = package$.MODULE$.TripleOperations(cache.filter(triple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$main$2(triple2));
        }).rdd()).toDF().select("s", Nil$.MODULE$);
        if (1 != 0) {
            select.show(false);
        }
        Predef$.MODULE$.println("GATHER CANDIDATE PAIRS");
        Encoder kryo = Encoders$.MODULE$.kryo(ClassTag$.MODULE$.apply(Triple.class));
        Dataset as = package$.MODULE$.TripleOperations(RDD$.MODULE$.rddToPairRDDFunctions(select.rdd().map(row -> {
            return new Tuple2(row.apply(0).toString(), row.apply(0));
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Any(), Ordering$String$.MODULE$).join(cache.rdd().map(triple3 -> {
            return new Tuple2(triple3.getSubject().toString(), triple3);
        }, ClassTag$.MODULE$.apply(Tuple2.class))).map(tuple2 -> {
            return (Triple) ((Tuple2) tuple2._2())._2();
        }, ClassTag$.MODULE$.apply(Triple.class))).toDS().as(kryo);
        if (1 != 0) {
            Predef$.MODULE$.println(new StringBuilder(29).append("the filtered kg has #triples:").append(as.count()).toString());
        }
        Dataset transform = new FeatureExtractorModel().setMode("os").transform(package$.MODULE$.TripleOperations(as.rdd()).toDF());
        if (1 != 0) {
            transform.show(false);
        }
        Dataset cache2 = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("vectorizedFeatures").fit(transform).transform(transform).select("uri", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"vectorizedFeatures"})).cache();
        if (1 != 0) {
            cache2.show(false);
        }
        JaccardModel inputCol = new JaccardModel().setInputCol("vectorizedFeatures");
        Dataset createDataFrame = orCreate.createDataFrame(inputCol.similarityJoin(cache2, cache2, inputCol.similarityJoin$default$3(), "distCol").filter(functions$.MODULE$.col("uriA").notEqual(functions$.MODULE$.col("uriB"))).rdd().map(row2 -> {
            return (Set) Predef$.MODULE$.Set().apply(ScalaRunTime$.MODULE$.wrapRefArray(new String[]{row2.getString(0), row2.getString(1)}));
        }, ClassTag$.MODULE$.apply(Set.class)).distinct().map(set -> {
            return set.toSeq();
        }, ClassTag$.MODULE$.apply(Seq.class)).map(seq -> {
            return Row$.MODULE$.apply(ScalaRunTime$.MODULE$.genericWrapArray(new Object[]{seq.apply(0), seq.apply(1)}));
        }, ClassTag$.MODULE$.apply(Row.class)), new StructType().add(new StructField("uriA", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())).add(new StructField("uriB", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())));
        if (1 != 0) {
            createDataFrame.show(false);
        }
        Predef$.MODULE$.println("PROMISING CANDDATES");
        Dataset createDataFrame2 = orCreate.createDataFrame(createDataFrame.rdd().flatMap(row3 -> {
            return new $colon.colon(row3.apply(0).toString(), new $colon.colon(row3.apply(1).toString(), Nil$.MODULE$));
        }, ClassTag$.MODULE$.apply(String.class)).distinct().map(str2 -> {
            return Row$.MODULE$.apply(ScalaRunTime$.MODULE$.genericWrapArray(new Object[]{str2}));
        }, ClassTag$.MODULE$.apply(Row.class)), new StructType().add(new StructField("id", StringType$.MODULE$, true, StructField$.MODULE$.apply$default$4())));
        Dataset select2 = createDataFrame.select("uriA", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"uriB"}));
        if (1 != 0) {
            createDataFrame2.show(false);
        }
        Predef$.MODULE$.println("POSTFILTER KG");
        Dataset as2 = package$.MODULE$.TripleOperations(RDD$.MODULE$.rddToPairRDDFunctions(createDataFrame2.rdd().map(row4 -> {
            return new Tuple2(row4.apply(0).toString(), row4.apply(0));
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Any(), Ordering$String$.MODULE$).join(as.rdd().map(triple4 -> {
            return new Tuple2(triple4.getSubject().toString(), triple4);
        }, ClassTag$.MODULE$.apply(Tuple2.class))).map(tuple22 -> {
            return (Triple) ((Tuple2) tuple22._2())._2();
        }, ClassTag$.MODULE$.apply(Triple.class))).toDS().as(kryo);
        if (1 != 0) {
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((Object[]) as2.take(20)), triple5 -> {
                $anonfun$main$14(triple5);
                return BoxedUnit.UNIT;
            });
        }
        if (1 != 0) {
            Predef$.MODULE$.println(BoxesRunTime.boxToLong(as2.count()));
        }
        package$.MODULE$.TripleOperations(as2.rdd()).toDF();
        Predef$.MODULE$.println("SMARTFEATUREEXTRACTOR");
        Dataset transform2 = new SmartFeatureExtractor().setEntityColumnName("s").transform(as2);
        if (1 != 0) {
            transform2.show(false);
        }
        if (1 != 0) {
            transform2.printSchema();
        }
        if (1 != 0) {
            Predef$.MODULE$.println("Decision for SimilarityEstimationApproach");
        }
        String[] strArr2 = null;
        if (0 == 0) {
            strArr2 = (String[]) ArrayOps$.MODULE$.drop$extension(Predef$.MODULE$.refArrayOps(transform2.columns()), 1);
        }
        if (1 != 0) {
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps(strArr2), str3 -> {
                $anonfun$main$15(str3);
                return BoxedUnit.UNIT;
            });
        }
        ObjectRef create = ObjectRef.create(createDataFrame);
        if (1 != 0) {
            ((Dataset) create.elem).show(false);
        }
        Predef$.MODULE$.println("CALCULATE SIMILARITIES");
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps(strArr2), str4 -> {
            $anonfun$main$16(transform2, z, select2, create, str4);
            return BoxedUnit.UNIT;
        });
        Predef$.MODULE$.println("SIMILARITY DATAFRAME");
        ((Dataset) create.elem).show();
        Predef$.MODULE$.println("OPTIONAL SIMILARITY STRECHING");
        ObjectRef create2 = ObjectRef.create(((Dataset) create.elem).cache());
        if (1 != 0) {
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((String[]) ArrayOps$.MODULE$.drop$extension(Predef$.MODULE$.refArrayOps(((Dataset) create2.elem).columns()), 3)), str5 -> {
                $anonfun$main$18(create2, str5);
                return BoxedUnit.UNIT;
            });
        }
        if (1 != 0) {
            ((Dataset) create2.elem).show(false);
        }
        Predef$.MODULE$.println("WEIGTHED SUM OVER SIMILARITY VALUES");
        Dataset dataset = 1 != 0 ? (Dataset) create2.elem : (Dataset) create.elem;
        String[] strArr3 = (String[]) ArrayOps$.MODULE$.drop$extension(Predef$.MODULE$.refArrayOps(((Dataset) create2.elem).columns()), 3);
        if (1 != 0) {
            Predef$.MODULE$.println("we will weight by four elements: importance, availability, information content, reliability");
        }
        ObjectRef create3 = ObjectRef.create((Object) null);
        if (((Map) create3.elem) == null) {
            create3.elem = Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(strArr3), str6 -> {
                return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str6), BoxesRunTime.boxToDouble(1.0d / strArr3.length));
            }, ClassTag$.MODULE$.apply(Tuple2.class))).toMap($less$colon$less$.MODULE$.refl());
        }
        if (1 != 0) {
            Predef$.MODULE$.println(new Tuple2("parameter_importance", (Map) create3.elem));
        }
        Predef$.MODULE$.assert(1.0d - RichDouble$.MODULE$.abs$extension(Predef$.MODULE$.doubleWrapper(BoxesRunTime.unboxToDouble(((IterableOnceOps) ((Map) create3.elem).toSeq().map(tuple23 -> {
            return BoxesRunTime.boxToDouble(tuple23._2$mcD$sp());
        })).sum(Numeric$DoubleIsFractional$.MODULE$)))) < 0.01d);
        Map<String, Object> map = null;
        if (0 == 0) {
            map = Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(strArr3), str7 -> {
                return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str7), BoxesRunTime.boxToDouble(1.0d / strArr3.length));
            }, ClassTag$.MODULE$.apply(Tuple2.class))).toMap($less$colon$less$.MODULE$.refl());
        }
        if (1 != 0) {
            Predef$.MODULE$.println(new Tuple2("parameter_reliability", map));
        }
        Predef$.MODULE$.assert(1.0d - RichDouble$.MODULE$.abs$extension(Predef$.MODULE$.doubleWrapper(BoxesRunTime.unboxToDouble(((IterableOnceOps) map.toSeq().map(tuple24 -> {
            return BoxesRunTime.boxToDouble(tuple24._2$mcD$sp());
        })).sum(Numeric$DoubleIsFractional$.MODULE$)))) < 0.01d);
        ObjectRef create4 = ObjectRef.create((Object) null);
        if (((Map) create4.elem) == null) {
            create4.elem = Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(strArr3), str8 -> {
                return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str8), BoxesRunTime.boxToDouble(1.0d / strArr3.length));
            }, ClassTag$.MODULE$.apply(Tuple2.class))).toMap($less$colon$less$.MODULE$.refl());
        }
        if (1 != 0) {
            Predef$.MODULE$.println(new Tuple2("parameter_availability", (Map) create4.elem));
        }
        Predef$.MODULE$.assert(1.0d - RichDouble$.MODULE$.abs$extension(Predef$.MODULE$.doubleWrapper(BoxesRunTime.unboxToDouble(((IterableOnceOps) ((Map) create4.elem).toSeq().map(tuple25 -> {
            return BoxesRunTime.boxToDouble(tuple25._2$mcD$sp());
        })).sum(Numeric$DoubleIsFractional$.MODULE$)))) < 0.01d);
        ObjectRef create5 = ObjectRef.create(dataset);
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps(strArr3), str9 -> {
            $anonfun$main$25(create5, create4, create3, str9);
            return BoxedUnit.UNIT;
        });
        create5.elem = ((Dataset) create5.elem).withColumn("overall_similarity_score", (Column) Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(strArr3), str10 -> {
            return new StringBuilder(4).append("tmp_").append(str10).toString();
        }, ClassTag$.MODULE$.apply(String.class))), str11 -> {
            return functions$.MODULE$.col(str11);
        }, ClassTag$.MODULE$.apply(Column.class))).reduce((column, column2) -> {
            return column.$plus(column2);
        }));
        ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(strArr3), str12 -> {
            return new StringBuilder(4).append("tmp_").append(str12).toString();
        }, ClassTag$.MODULE$.apply(String.class))), str13 -> {
            $anonfun$main$30(create5, str13);
            return BoxedUnit.UNIT;
        });
        if (1 != 0) {
            ((Dataset) create5.elem).show(false);
        }
        Predef$.MODULE$.println("SEMANTIFICATION OF RESULTS");
        RDD<Triple> dasimSemantification = dasimSemantification((Dataset) create5.elem, new String[]{"uriA", "uriB"}, "overall_similarity_score", strArr3, (Map) create4.elem, map, (Map) create3.elem, "os", "unknown", "unknown");
        if (1 != 0) {
            dasimSemantification.foreach(obj -> {
                $anonfun$main$31(obj);
                return BoxedUnit.UNIT;
            });
        }
        if (1 != 0) {
            Predef$.MODULE$.println(BoxesRunTime.boxToLong(dasimSemantification.count()));
        }
        package$ package_ = package$.MODULE$;
        boolean coalesce$default$2 = dasimSemantification.coalesce$default$2();
        Option coalesce$default$3 = dasimSemantification.coalesce$default$3();
        package_.TripleOperations(dasimSemantification.coalesce(1, coalesce$default$2, coalesce$default$3, dasimSemantification.coalesce$default$4(1, coalesce$default$2, coalesce$default$3))).saveAsNTriplesFile("/Users/carstendraschner/Downloads/tmpDasimOutput2");
    }

    public Dataset<Row> doubleSim(Dataset<Row> dataset, String str, String str2, String str3) {
        return dataset;
    }

    public String doubleSim$default$3() {
        return "uriA";
    }

    public String doubleSim$default$4() {
        return "uriB";
    }

    public RDD<Triple> dasimSemantification(Dataset<Row> dataset, String[] strArr, String str, String[] strArr2, Map<String, Object> map, Map<String, Object> map2, Map<String, Object> map3, String str2, String str3, String str4) {
        SparkSession orCreate = SparkSession$.MODULE$.builder().getOrCreate();
        ObjectRef create = ObjectRef.create("sansa-stack/sansaVocab/comment");
        Node createURI = NodeFactory.createURI("sansa-stack/sansaVocab/hyperparameter");
        Node createURI2 = NodeFactory.createURI("rdfs/label");
        Node createURI3 = NodeFactory.createURI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
        Node createURI4 = NodeFactory.createURI("sansa-stack/sansaVocab/value");
        Node createURI5 = NodeFactory.createURI("sansa-stack/sansaVocab/element");
        String num = Integer.toString(Calendar.getInstance().getTime().toString().hashCode());
        Node createURI6 = NodeFactory.createURI(new StringBuilder(1).append("sansa-stack/sansaVocab/experiment").append("/").append(num).toString());
        Node createURI7 = NodeFactory.createURI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
        Node createURI8 = NodeFactory.createURI("sansa-stack/sansaVocab/experiment");
        Node createURI9 = NodeFactory.createURI("sansa-stack/sansaVocab/prediction");
        Node createURI10 = NodeFactory.createURI("sansa-stack/sansaVocab/value");
        SparkContext sparkContext = orCreate.sqlContext().sparkContext();
        RDD parallelize = sparkContext.parallelize(new $colon.colon(Triple.create(createURI6, createURI7, createURI8), Nil$.MODULE$), sparkContext.parallelize$default$2(), ClassTag$.MODULE$.apply(Triple.class));
        parallelize.foreach(obj -> {
            $anonfun$dasimSemantification$1(obj);
            return BoxedUnit.UNIT;
        });
        Node createURI11 = NodeFactory.createURI(new StringBuilder(30).append("sansa-stack/sansaVocab/experiment").append("/").append(num).append("/hyperparameter/initialFilter").toString());
        Node createURI12 = NodeFactory.createURI(new StringBuilder(41).append("sansa-stack/sansaVocab/experiment").append("/").append(num).append("/hyperparameter/distSimFeatureExtraction").toString());
        Node createURI13 = NodeFactory.createURI(new StringBuilder(42).append("sansa-stack/sansaVocab/experiment").append("/").append(num).append("/hyperparameter/featureExtractionStrategy").toString());
        Node createURI14 = NodeFactory.createURI(new StringBuilder(29).append("sansa-stack/sansaVocab/experiment").append("/").append(num).append("/hyperparameter/availability").toString());
        Node createURI15 = NodeFactory.createURI(new StringBuilder(28).append("sansa-stack/sansaVocab/experiment").append("/").append(num).append("/hyperparameter/reliability").toString());
        Node createURI16 = NodeFactory.createURI(new StringBuilder(27).append("sansa-stack/sansaVocab/experiment").append("/").append(num).append("/hyperparameter/importance").toString());
        SparkContext sparkContext2 = orCreate.sqlContext().sparkContext();
        RDD parallelize2 = sparkContext2.parallelize((List) scala.package$.MODULE$.List().apply(ScalaRunTime$.MODULE$.wrapRefArray(new Triple[]{Triple.create(createURI6, createURI, createURI11), Triple.create(createURI11, createURI3, createURI), Triple.create(createURI11, createURI2, NodeFactory.createLiteral("initial filter")), Triple.create(createURI11, createURI4, NodeFactory.createLiteral(str3)), Triple.create(createURI6, createURI, createURI12), Triple.create(createURI12, createURI3, createURI), Triple.create(createURI12, createURI2, NodeFactory.createLiteral("DistSim feature extraction strategy")), Triple.create(createURI12, createURI4, NodeFactory.createLiteral(str2)), Triple.create(createURI6, createURI, createURI13), Triple.create(createURI13, createURI3, createURI), Triple.create(createURI13, createURI2, NodeFactory.createLiteral("feature extraction strategy")), Triple.create(createURI13, createURI4, NodeFactory.createLiteral(str4)), Triple.create(createURI6, createURI, createURI14), Triple.create(createURI14, createURI3, createURI), Triple.create(createURI14, createURI2, NodeFactory.createLiteral("availability")), Triple.create(createURI14, createURI4, NodeFactory.createLiteral(((IterableOnceOps) map.map(tuple2 -> {
            return new StringBuilder(2).append((String) tuple2._1()).append(": ").append(Double.toString(tuple2._2$mcD$sp())).toString();
        })).mkString("; "))), Triple.create(createURI6, createURI, createURI15), Triple.create(createURI15, createURI3, createURI), Triple.create(createURI15, createURI2, NodeFactory.createLiteral("reliability")), Triple.create(createURI15, createURI4, NodeFactory.createLiteral(((IterableOnceOps) map2.map(tuple22 -> {
            return new StringBuilder(2).append((String) tuple22._1()).append(": ").append(Double.toString(tuple22._2$mcD$sp())).toString();
        })).mkString("; "))), Triple.create(createURI6, createURI, createURI16), Triple.create(createURI16, createURI3, createURI), Triple.create(createURI16, createURI2, NodeFactory.createLiteral("importance")), Triple.create(createURI16, createURI4, NodeFactory.createLiteral(((IterableOnceOps) map3.map(tuple23 -> {
            return new StringBuilder(2).append((String) tuple23._1()).append(": ").append(Double.toString(tuple23._2$mcD$sp())).toString();
        })).mkString("; ")))})), sparkContext2.parallelize$default$2(), ClassTag$.MODULE$.apply(Triple.class));
        parallelize2.foreach(obj2 -> {
            $anonfun$dasimSemantification$5(obj2);
            return BoxedUnit.UNIT;
        });
        return parallelize.union(dataset.rdd().flatMap(row -> {
            String str5 = (String) row.getAs(strArr[0]);
            String str6 = (String) row.getAs(strArr[1]);
            double unboxToDouble = BoxesRunTime.unboxToDouble(row.getAs("overall_similarity_score"));
            Tuple2[] tuple2Arr = (Tuple2[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(strArr2), str7 -> {
                return new Tuple2(str7, row.getAs(str7));
            }, ClassTag$.MODULE$.apply(Tuple2.class));
            double _2$mcD$sp = ((Tuple2) ArrayOps$.MODULE$.last$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.sortBy$extension(Predef$.MODULE$.refArrayOps(tuple2Arr), tuple24 -> {
                return BoxesRunTime.boxToDouble(tuple24._2$mcD$sp());
            }, Ordering$DeprecatedDoubleOrdering$.MODULE$))))._2$mcD$sp();
            double d = 0.001d;
            Tuple2[] tuple2Arr2 = (Tuple2[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps(tuple2Arr), tuple25 -> {
                return BoxesRunTime.boxToBoolean($anonfun$dasimSemantification$9(_2$mcD$sp, d, tuple25));
            });
            new Tuple4(str5, str6, BoxesRunTime.boxToDouble(unboxToDouble), Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(tuple2Arr2), tuple26 -> {
                return new StringBuilder(2).append((String) tuple26._1()).append(": ").append(Double.toString(tuple26._2$mcD$sp())).toString();
            }, ClassTag$.MODULE$.apply(String.class))).mkString("; "));
            Node[] nodeArr = {NodeFactory.createURI(str5), NodeFactory.createURI(str6)};
            Node createLiteralByValue = NodeFactory.createLiteralByValue(BoxesRunTime.boxToDouble(unboxToDouble), XSDDatatype.XSDdouble);
            Node createLiteral = NodeFactory.createLiteral((String) create.elem);
            Node createURI17 = NodeFactory.createURI(new StringBuilder(14).append("most relevant:").append(Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(tuple2Arr2), tuple27 -> {
                return new StringBuilder(2).append((String) tuple27._1()).append(": ").append(Double.toString(tuple27._2$mcD$sp())).toString();
            }, ClassTag$.MODULE$.apply(String.class))).mkString("; ")).toString());
            Node createURI18 = NodeFactory.createURI(new StringBuilder(0).append(num).append(Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(nodeArr), node -> {
                return node.getURI();
            }, ClassTag$.MODULE$.apply(String.class))).mkString("").hashCode()).toString());
            return Predef$.MODULE$.wrapRefArray((Object[]) ArrayOps$.MODULE$.$plus$plus$extension(Predef$.MODULE$.refArrayOps((Triple[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(nodeArr), node2 -> {
                return Triple.create(createURI18, createURI5, node2);
            }, ClassTag$.MODULE$.apply(Triple.class))), new Triple[]{Triple.create(createURI6, createURI9, createURI18), Triple.create(createURI18, createURI10, createLiteralByValue), Triple.create(createURI18, createLiteral, createURI17)}, ClassTag$.MODULE$.apply(Triple.class)));
        }, ClassTag$.MODULE$.apply(Triple.class))).union(parallelize2);
    }

    public static final /* synthetic */ void $anonfun$main$1(Triple triple) {
        Predef$.MODULE$.println(triple);
    }

    public static final /* synthetic */ boolean $anonfun$main$2(Triple triple) {
        return triple.getObject().toString().equals("http://data.linkedmdb.org/movie/film");
    }

    public static final /* synthetic */ void $anonfun$main$14(Triple triple) {
        Predef$.MODULE$.println(triple);
    }

    public static final /* synthetic */ void $anonfun$main$15(String str) {
        Predef$.MODULE$.println(new StringBuilder(35).append("similarity estimation for feature: ").append(str).toString());
    }

    public static final /* synthetic */ double $anonfun$main$17(Vector vector, Vector vector2) {
        Set set = Predef$.MODULE$.wrapIntArray(vector.toSparse().indices()).toSet();
        Set set2 = Predef$.MODULE$.wrapIntArray(vector2.toSparse().indices()).toSet();
        double size = set.intersect(set2).size();
        double size2 = set.union(set2).size();
        if (size2 == 0.0d) {
            return 0.0d;
        }
        return size / size2;
    }

    public static final /* synthetic */ void $anonfun$main$16(Dataset dataset, boolean z, Dataset dataset2, ObjectRef objectRef, String str) {
        Dataset withColumnRenamed;
        Predef$.MODULE$.println(str);
        Dataset select = dataset.select("s", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{str}));
        if (z) {
            Predef$.MODULE$.println("respective to feature type we need to normalize and change data so similarity estimator can operate on it");
        }
        DataType dataType = select.schema().apply(1).dataType();
        DoubleType$ doubleType$ = DoubleType$.MODULE$;
        if (dataType != null ? !dataType.equals(doubleType$) : doubleType$ != null) {
            DataType dataType2 = select.schema().apply(1).dataType();
            TimestampType$ timestampType$ = TimestampType$.MODULE$;
            if (dataType2 != null ? !dataType2.equals(timestampType$) : timestampType$ != null) {
                DataType dataType3 = select.schema().apply(1).dataType();
                ArrayType apply = ArrayType$.MODULE$.apply(StringType$.MODULE$);
                if (dataType3 != null ? !dataType3.equals(apply) : apply != null) {
                    DataType dataType4 = select.schema().apply(1).dataType();
                    StringType$ stringType$ = StringType$.MODULE$;
                    if (dataType4 != null ? !dataType4.equals(stringType$) : stringType$ != null) {
                        Predef$.MODULE$.println("you should never end up here");
                        withColumnRenamed = select.withColumnRenamed(str, "preparedFeature");
                    } else {
                        Dataset transform = new HashingTF().setInputCol("tmp").setOutputCol("rawFeatures").transform(select.groupBy("s", Nil$.MODULE$).agg(functions$.MODULE$.collect_list(str).as("tmp"), Nil$.MODULE$).select("s", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"tmp"})));
                        withColumnRenamed = new IDF().setInputCol("rawFeatures").setOutputCol("preparedFeature").fit(transform).transform(transform).select("s", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"preparedFeature"}));
                    }
                } else {
                    Dataset transform2 = new HashingTF().setInputCol(str).setOutputCol("rawFeatures").transform(select);
                    withColumnRenamed = new IDF().setInputCol("rawFeatures").setOutputCol("preparedFeature").fit(transform2).transform(transform2).select("s", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"preparedFeature"}));
                }
            } else {
                Dataset withColumn = select.withColumn("unixTimestamp", functions$.MODULE$.unix_timestamp(functions$.MODULE$.col(str)).cast("double"));
                Row row = (Row) withColumn.agg(functions$.MODULE$.min("unixTimestamp"), ScalaRunTime$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.max("unixTimestamp")})).head();
                double d = row.getDouble(0);
                double d2 = row.getDouble(1);
                withColumnRenamed = withColumn.withColumn("preparedFeature", functions$.MODULE$.col("unixTimestamp").$minus(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(d))).$div(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(d2 - d != ((double) 0) ? d2 - d : 1.0d))));
            }
        } else {
            Row row2 = (Row) select.agg(functions$.MODULE$.min(str), ScalaRunTime$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.max(str)})).head();
            double d3 = row2.getDouble(0);
            double d4 = row2.getDouble(1);
            withColumnRenamed = select.withColumn("preparedFeature", functions$.MODULE$.col(str).$minus(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(d3))).$div(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(d4 - d3 > ((double) 0) ? d4 - d3 : 1.0d))));
        }
        Dataset dataset3 = withColumnRenamed;
        Dataset drop = dataset2.join(dataset3.select("s", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"preparedFeature"})).withColumnRenamed("preparedFeature", new StringBuilder(14).append(str).append("_prepared_uriA").toString()), dataset2.apply("uriA").$eq$eq$eq(dataset.apply("s")), "inner").drop("s").join(dataset3.select("s", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"preparedFeature"})).withColumnRenamed("preparedFeature", new StringBuilder(14).append(str).append("_prepared_uriB").toString()), dataset2.apply("uriB").$eq$eq$eq(dataset.apply("s")), "left").drop("s");
        if (z) {
            Predef$.MODULE$.println(new StringBuilder(59).append("this is our combined dataframe for the respective feature: ").append(str).toString());
        }
        if (z) {
            Predef$.MODULE$.println("now we execute the respective similarity estimation for this df of candidates");
        }
        if (z) {
            Predef$.MODULE$.println("we need to decide about similarity type by column data type");
        }
        DataType dataType5 = select.schema().apply(1).dataType();
        StringType$ stringType$2 = StringType$.MODULE$;
        if (dataType5 != null ? !dataType5.equals(stringType$2) : stringType$2 != null) {
            DataType dataType6 = select.schema().apply(1).dataType();
            ArrayType apply2 = ArrayType$.MODULE$.apply(StringType$.MODULE$);
            if (dataType6 != null ? !dataType6.equals(apply2) : apply2 != null) {
                DataType dataType7 = select.schema().apply(1).dataType();
                TimestampType$ timestampType$2 = TimestampType$.MODULE$;
                if (dataType7 != null ? !dataType7.equals(timestampType$2) : timestampType$2 != null) {
                    DataType dataType8 = select.schema().apply(1).dataType();
                    DoubleType$ doubleType$2 = DoubleType$.MODULE$;
                    if (dataType8 == null) {
                        if (doubleType$2 != null) {
                            return;
                        }
                    } else if (!dataType8.equals(doubleType$2)) {
                        return;
                    }
                }
                Dataset withColumn2 = drop.withColumn(new StringBuilder(4).append(str).append("_sim").toString(), functions$.MODULE$.lit(BoxesRunTime.boxToDouble(1.0d)).$minus(functions$.MODULE$.abs(functions$.MODULE$.col(new StringBuilder(14).append(str).append("_prepared_uriA").toString()).$minus(functions$.MODULE$.col(new StringBuilder(14).append(str).append("_prepared_uriB").toString())))));
                if (z) {
                    withColumn2.show(false);
                }
                objectRef.elem = ((Dataset) objectRef.elem).join(withColumn2.select("uriA", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"uriB", new StringBuilder(4).append(str).append("_sim").toString()})), new $colon.colon("uriA", new $colon.colon("uriB", Nil$.MODULE$)), "inner");
                return;
            }
        }
        Dataset withColumn3 = drop.withColumn(new StringBuilder(4).append(str).append("_sim").toString(), functions$.MODULE$.udf((vector, vector2) -> {
            return BoxesRunTime.boxToDouble($anonfun$main$17(vector, vector2));
        }, scala.reflect.runtime.package$.MODULE$.universe().TypeTag().Double(), scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(MODULE$.getClass().getClassLoader()), new TypeCreator() { // from class: net.sansa_stack.examples.spark.ml.Similarity.DaSim$$typecreator31$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        }), scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(MODULE$.getClass().getClassLoader()), new TypeCreator() { // from class: net.sansa_stack.examples.spark.ml.Similarity.DaSim$$typecreator32$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        })).apply(ScalaRunTime$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(new StringBuilder(14).append(str).append("_prepared_uriA").toString()), functions$.MODULE$.col(new StringBuilder(14).append(str).append("_prepared_uriB").toString())})));
        if (z) {
            withColumn3.show(false);
        }
        objectRef.elem = ((Dataset) objectRef.elem).join(withColumn3.select("uriA", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{"uriB", new StringBuilder(4).append(str).append("_sim").toString()})), new $colon.colon("uriA", new $colon.colon("uriB", Nil$.MODULE$)), "inner");
    }

    public static final /* synthetic */ void $anonfun$main$18(ObjectRef objectRef, String str) {
        Row row = (Row) ((Dataset) objectRef.elem).agg(functions$.MODULE$.min(str), ScalaRunTime$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.max(str)})).head();
        double d = row.getDouble(0);
        double d2 = row.getDouble(1);
        objectRef.elem = ((Dataset) objectRef.elem).withColumn("tmp", functions$.MODULE$.col(str).$minus(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(d))).$div(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(d2 - d != ((double) 0) ? d2 - d : 1.0d)))).drop(str).withColumnRenamed("tmp", str);
    }

    public static final /* synthetic */ void $anonfun$main$25(ObjectRef objectRef, ObjectRef objectRef2, ObjectRef objectRef3, String str) {
        objectRef.elem = ((Dataset) objectRef.elem).withColumn(new StringBuilder(4).append("tmp_").append(str).toString(), functions$.MODULE$.col(str).$times(functions$.MODULE$.lit(((Map) objectRef2.elem).apply(str)).$plus(functions$.MODULE$.lit(((Map) objectRef2.elem).apply(str))).$plus(functions$.MODULE$.lit(((Map) objectRef3.elem).apply(str)))).$div(BoxesRunTime.boxToDouble(3.0d)));
    }

    public static final /* synthetic */ void $anonfun$main$30(ObjectRef objectRef, String str) {
        objectRef.elem = ((Dataset) objectRef.elem).drop(str);
    }

    public static final /* synthetic */ void $anonfun$main$31(Object obj) {
        Predef$.MODULE$.println(obj);
    }

    public static final /* synthetic */ void $anonfun$dasimSemantification$1(Object obj) {
        Predef$.MODULE$.println(obj);
    }

    public static final /* synthetic */ void $anonfun$dasimSemantification$5(Object obj) {
        Predef$.MODULE$.println(obj);
    }

    public static final /* synthetic */ boolean $anonfun$dasimSemantification$9(double d, double d2, Tuple2 tuple2) {
        return d - tuple2._2$mcD$sp() < d2;
    }

    private DaSim$() {
    }
}
