package net.sansa_stack.ml.spark.similarity.run;

import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.BatetModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.BraunBlanquetModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.DiceModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.JaccardModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.MinHashModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.OchiaiModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.SimpsonModel;
import net.sansa_stack.ml.spark.similarity.similarityEstimationModels.TverskyModel;
import net.sansa_stack.ml.spark.utils.FeatureExtractorModel;
import net.sansa_stack.ml.spark.utils.SimilarityExperimentMetaGraphFactory;
import net.sansa_stack.rdf.spark.io.package;
import net.sansa_stack.rdf.spark.io.package$;
import org.apache.jena.graph.Triple;
import org.apache.jena.riot.Lang;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataTypes;
import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.runtime.BoxesRunTime;

/* compiled from: SimilarityPipeline.scala */
/* loaded from: input_file:net/sansa_stack/ml/spark/similarity/run/SimilarityPipeline$.class */
public final class SimilarityPipeline$ {
    public static SimilarityPipeline$ MODULE$;

    static {
        new SimilarityPipeline$();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v69, types: [net.sansa_stack.ml.spark.similarity.similarityEstimationModels.GenericSimilarityEstimatorModel] */
    /* JADX WARN: Type inference failed for: r0v71, types: [net.sansa_stack.ml.spark.similarity.similarityEstimationModels.GenericSimilarityEstimatorModel] */
    /* JADX WARN: Type inference failed for: r0v73, types: [net.sansa_stack.ml.spark.similarity.similarityEstimationModels.GenericSimilarityEstimatorModel] */
    /* JADX WARN: Type inference failed for: r0v75, types: [net.sansa_stack.ml.spark.similarity.similarityEstimationModels.GenericSimilarityEstimatorModel] */
    /* JADX WARN: Type inference failed for: r0v77, types: [net.sansa_stack.ml.spark.similarity.similarityEstimationModels.GenericSimilarityEstimatorModel] */
    /* JADX WARN: Type inference failed for: r0v79, types: [net.sansa_stack.ml.spark.similarity.similarityEstimationModels.GenericSimilarityEstimatorModel] */
    /* JADX WARN: Type inference failed for: r0v95, types: [net.sansa_stack.ml.spark.similarity.similarityEstimationModels.GenericSimilarityEstimatorModel] */
    public void main(String[] strArr) {
        TverskyModel beta;
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        SparkSession orCreate = SparkSession$.MODULE$.builder().appName("MinMal Semantic Similarity Estimation Calls").master("local[*]").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").getOrCreate();
        Predef$.MODULE$.println(new StringBuilder(12).append("ReadIn file:").append(str).toString());
        Dataset<?> dataset = (Dataset) package$.MODULE$.RDFDataFrameReader(orCreate.read()).rdf(Lang.NTRIPLES).apply(str);
        Predef$.MODULE$.println(new StringBuilder(27).append("Extract features with mode:").append("at").toString());
        Dataset<Row> transform = new FeatureExtractorModel().setMode("at").transform(dataset);
        Predef$.MODULE$.println("Count Vectorizer");
        Dataset<Row> select = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("vectorizedFeatures").setMinTF(1).setMinDF(1).setVocabSize(100000).fit(transform).transform(transform).filter(functions$.MODULE$.udf(vector -> {
            return BoxesRunTime.boxToBoolean($anonfun$main$1(vector));
        }, DataTypes.BooleanType).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("vectorizedFeatures")}))).select("uri", Predef$.MODULE$.wrapRefArray(new String[]{"vectorizedFeatures"}));
        Predef$.MODULE$.println("EstimatorModel setup");
        if ("Batet".equals(str3)) {
            beta = new BatetModel().setInputCol("vectorizedFeatures");
        } else if ("BraunBlanquet".equals(str3)) {
            beta = new BraunBlanquetModel().setInputCol("vectorizedFeatures");
        } else if ("Dice".equals(str3)) {
            beta = new DiceModel().setInputCol("vectorizedFeatures");
        } else if ("Jaccard".equals(str3)) {
            beta = new JaccardModel().setInputCol("vectorizedFeatures");
        } else if ("MinHash".equals(str3)) {
            beta = new MinHashModel().setInputCol("vectorizedFeatures");
        } else if ("Ochiai".equals(str3)) {
            beta = new OchiaiModel().setInputCol("vectorizedFeatures");
        } else if ("Simpson".equals(str3)) {
            beta = new SimpsonModel().setInputCol("vectorizedFeatures");
        } else {
            if (!"Tversky".equals(str3)) {
                throw new MatchError(str3);
            }
            beta = ((TverskyModel) new TverskyModel().setInputCol("vectorizedFeatures")).setAlpha(1.0d).setBeta(1.0d);
        }
        TverskyModel tverskyModel = beta;
        RDD<Triple> createRdfOutput = new SimilarityExperimentMetaGraphFactory().createRdfOutput(tverskyModel.similarityJoin(select, select, 0.5d, "distCol"), tverskyModel.estimatorName(), tverskyModel.modelType(), tverskyModel.estimatorMeasureType(), dataset.count(), str);
        package$ package_ = package$.MODULE$;
        Option coalesce$default$3 = createRdfOutput.coalesce$default$3();
        package.RDFWriter RDFWriter = package_.RDFWriter(createRdfOutput.coalesce(1, true, coalesce$default$3, createRdfOutput.coalesce$default$4(1, true, coalesce$default$3)));
        RDFWriter.saveAsNTriplesFile(str2, RDFWriter.saveAsNTriplesFile$default$2(), RDFWriter.saveAsNTriplesFile$default$3());
    }

    public static final /* synthetic */ boolean $anonfun$main$1(Vector vector) {
        return vector.numNonzeros() > 0;
    }

    private SimilarityPipeline$() {
        MODULE$ = this;
    }
}
