package net.sansa_stack.ml.spark.anomalydetection;

import com.linkedin.relevance.isolationforest.IsolationForest;
import net.sansa_stack.ml.spark.featureExtraction.FeatureExtractingSparqlGenerator$;
import net.sansa_stack.ml.spark.featureExtraction.SparqlFrame;
import net.sansa_stack.query.spark.package$SPARQLEngine$;
import net.sansa_stack.rdf.spark.model.package$;
import org.apache.jena.graph.Triple;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DoubleType$;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple3;
import scala.collection.ArrayOps$;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;
import scala.runtime.ScalaRunTime$;

/* compiled from: MultiFeatureAnomalyDetection.scala */
@ScalaSignature(bytes = "\u0006\u0005\r4A\u0001C\u0005\u0001)!AA\u0002\u0001B\u0001B\u0003%1\u0004\u0003\u0005'\u0001\t\u0005\t\u0015!\u0003(\u0011!)\u0004A!A!\u0002\u00131\u0004\"\u0002\u001e\u0001\t\u0003Y\u0004\"\u0002!\u0001\t\u0003\t\u0005\"\u0002+\u0001\t\u0003)\u0006\"\u00020\u0001\t\u0003y&\u0001H'vYRLg)Z1ukJ,\u0017I\\8nC2LH)\u001a;fGRLwN\u001c\u0006\u0003\u0015-\t\u0001#\u00198p[\u0006d\u0017\u0010Z3uK\u000e$\u0018n\u001c8\u000b\u00051i\u0011!B:qCJ\\'B\u0001\b\u0010\u0003\tiGN\u0003\u0002\u0011#\u0005Y1/\u00198tC~\u001bH/Y2l\u0015\u0005\u0011\u0012a\u00018fi\u000e\u00011C\u0001\u0001\u0016!\t1\u0012$D\u0001\u0018\u0015\u0005A\u0012!B:dC2\f\u0017B\u0001\u000e\u0018\u0005\u0019\te.\u001f*fMB\u0011A\u0004J\u0007\u0002;)\u0011adH\u0001\u0004gFd'B\u0001\u0007!\u0015\t\t#%\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u0002G\u0005\u0019qN]4\n\u0005\u0015j\"\u0001D*qCJ\\7+Z:tS>t\u0017aD8sS\u001eLg.\u00197ECR\f'\u000b\u0012#\u0011\u0007!ZS&D\u0001*\u0015\tQs$A\u0002sI\u0012L!\u0001L\u0015\u0003\u0007I#E\t\u0005\u0002/g5\tqF\u0003\u00021c\u0005)qM]1qQ*\u0011!\u0007I\u0001\u0005U\u0016t\u0017-\u0003\u00025_\t1AK]5qY\u0016\faaY8oM&<\u0007CA\u001c9\u001b\u0005I\u0011BA\u001d\n\u00051!\u0015n\u001d;B\t\u000e{gNZ5h\u0003\u0019a\u0014N\\5u}Q!A(\u0010 @!\t9\u0004\u0001C\u0003\r\t\u0001\u00071\u0004C\u0003'\t\u0001\u0007q\u0005C\u00036\t\u0001\u0007a'A\u0002sk:$\u0012A\u0011\t\u0003\u0007Fs!\u0001R(\u000f\u0005\u0015seB\u0001$N\u001d\t9EJ\u0004\u0002I\u00176\t\u0011J\u0003\u0002K'\u00051AH]8pizJ\u0011aI\u0005\u0003C\tJ!\u0001\u0004\u0011\n\u0005yy\u0012B\u0001)\u001e\u0003\u001d\u0001\u0018mY6bO\u0016L!AU*\u0003\u0013\u0011\u000bG/\u0019$sC6,'B\u0001)\u001e\u0003\u0011\u001a\u0017\r\\2vY\u0006$X-\u00118p[\u0006d\u0017.Z:G_JlU\u000f\u001c;ja2,g)Z1ukJ,GC\u0001\"W\u0011\u00159f\u00011\u0001Y\u0003\u0011!\u0017\r^1\u0011\u0007qI6,\u0003\u0002[;\t9A)\u0019;bg\u0016$\bC\u0001\u000f]\u0013\tiVDA\u0002S_^\f!#\u00193e\u00072,8\u000f^3s\u0013\u0012$v\u000eR1uCR\u0019!\tY1\t\u000b];\u0001\u0019\u0001\"\t\u000b\t<\u0001\u0019\u0001\"\u0002\u0017A\u0014X\rZ5di&|gn\u001d")
/* loaded from: input_file:net/sansa_stack/ml/spark/anomalydetection/MultiFeatureAnomalyDetection.class */
public class MultiFeatureAnomalyDetection {
    private final SparkSession spark;
    private final RDD<Triple> originalDataRDD;
    private final DistADConfig config;

    public Dataset<Row> run() {
        Dataset<Row> dataset;
        Dataset<Row> addClusterIdToData;
        Dataset<Row> calculateBiSectingKmeanClustering;
        long currentTimeMillis = System.currentTimeMillis();
        RDD<Triple> triplesWithNumericLitWithTypeIgnoreEndingWithID = DistADUtil$.MODULE$.triplesWithNumericLitWithTypeIgnoreEndingWithID(this.originalDataRDD);
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Original Data RDD Only with numeric Literals:");
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((Object[]) triplesWithNumericLitWithTypeIgnoreEndingWithID.take(10)), obj -> {
                $anonfun$run$1(obj);
                return BoxedUnit.UNIT;
            });
        }
        Dataset<?> dataset2 = null;
        if (this.config.featureExtractor().equals(this.config.LITERAL2FEATURE())) {
            Encoders$.MODULE$.kryo(Tuple3.class);
            dataset2 = package$.MODULE$.TripleOperations(triplesWithNumericLitWithTypeIgnoreEndingWithID).toDS();
        }
        Dataset cache = DistADUtil$.MODULE$.createDFWithConversion(triplesWithNumericLitWithTypeIgnoreEndingWithID).cache();
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Original Data DataFrame Only with numeric Literals:");
            cache.show(false);
        }
        String featureExtractor = this.config.featureExtractor();
        String PIVOT = this.config.PIVOT();
        if (PIVOT != null ? !PIVOT.equals(featureExtractor) : featureExtractor != null) {
            String LITERAL2FEATURE = this.config.LITERAL2FEATURE();
            if (LITERAL2FEATURE != null ? !LITERAL2FEATURE.equals(featureExtractor) : featureExtractor != null) {
                throw new MatchError(featureExtractor);
            }
            DistADLogger$.MODULE$.LOG().info("Starting Literal2Feature. May take time....");
            ObjectRef create = ObjectRef.create(new SparqlFrame().setSparqlQuery((String) FeatureExtractingSparqlGenerator$.MODULE$.createSparql(dataset2, "?s", "?s ?p ?o", 0, this.config.l2fDepth(), this.config.l2fSeedNumber(), 1.0d, FeatureExtractingSparqlGenerator$.MODULE$.createSparql$default$8(), FeatureExtractingSparqlGenerator$.MODULE$.createSparql$default$9(), FeatureExtractingSparqlGenerator$.MODULE$.createSparql$default$10())._1()).setQueryExcecutionEngine(package$SPARQLEngine$.MODULE$.Sparqlify()).setCollapsByKey(false).transform(dataset2));
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps(((Dataset) create.elem).columns()), str -> {
                $anonfun$run$3(create, str);
                return BoxedUnit.UNIT;
            });
            dataset = (Dataset) create.elem;
        } else {
            Dataset agg = cache.groupBy("s", Nil$.MODULE$).pivot("p").agg(functions$.MODULE$.first("o"), Nil$.MODULE$);
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info("Original Data DataFrame Only with numeric Literals and Pivoted:");
                agg.show(false);
            }
            Dataset<Row> df = agg.toDF(Predef$.MODULE$.copyArrayToImmutableIndexedSeq(ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps(agg.columns()), str2 -> {
                return str2.replace(".", "_");
            }, ClassTag$.MODULE$.apply(String.class))));
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info("Original Data DataFrame Only with numeric Literals and Pivoted-Columns renamed:");
                df.show(false);
            }
            dataset = df;
        }
        Dataset<Row> dataset3 = dataset;
        if (this.config.verbose()) {
            dataset3.show(false);
        }
        String clusteringMethod = this.config.clusteringMethod();
        String BISECTINGKMEANS = this.config.BISECTINGKMEANS();
        if (BISECTINGKMEANS != null ? !BISECTINGKMEANS.equals(clusteringMethod) : clusteringMethod != null) {
            String MINHASHLSH = this.config.MINHASHLSH();
            if (MINHASHLSH != null ? !MINHASHLSH.equals(clusteringMethod) : clusteringMethod != null) {
                throw new MatchError(clusteringMethod);
            }
            Dataset<Row> calculateMinHashLSHClustering = DistADUtil$.MODULE$.calculateMinHashLSHClustering(triplesWithNumericLitWithTypeIgnoreEndingWithID, this.originalDataRDD, this.config);
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info("Result of clustering:");
                calculateMinHashLSHClustering.show(false);
            }
            addClusterIdToData = addClusterIdToData(dataset3, calculateMinHashLSHClustering);
        } else {
            if (this.config.silhouetteMethod()) {
                this.config.numberOfClusters_$eq(DistADUtil$.MODULE$.detectNumberOfClusters(dataset3, this.config.silhouetteMethodSamplingRate()));
                DistADLogger$.MODULE$.LOG().info(new StringBuilder(45).append("Number of optimal cluster for the dataset is ").append(this.config.numberOfClusters()).toString());
            }
            String clusteringType = this.config.clusteringType();
            String PARTIAL = this.config.PARTIAL();
            if (PARTIAL != null ? !PARTIAL.equals(clusteringType) : clusteringType != null) {
                String FULL = this.config.FULL();
                if (FULL != null ? !FULL.equals(clusteringType) : clusteringType != null) {
                    throw new MatchError(clusteringType);
                }
                calculateBiSectingKmeanClustering = DistADUtil$.MODULE$.calculateBiSectingKmeanClustering(this.originalDataRDD, this.config.numberOfClusters());
            } else {
                calculateBiSectingKmeanClustering = DistADUtil$.MODULE$.calculateBiSectingKmeanClustering(triplesWithNumericLitWithTypeIgnoreEndingWithID, this.config.numberOfClusters());
            }
            Dataset<Row> dataset4 = calculateBiSectingKmeanClustering;
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info(new StringBuilder(36).append("Result of clustering with ").append(this.config.numberOfClusters()).append(" clusters:").toString());
                dataset4.show(false);
            }
            addClusterIdToData = addClusterIdToData(dataset3, dataset4);
        }
        Dataset<Row> dataset5 = addClusterIdToData;
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Add clustering result to data:");
            dataset5.show(false);
        }
        Dataset<Row> calculateAnomaliesForMultipleFeature = calculateAnomaliesForMultipleFeature(dataset5);
        StringBuilder stringBuilder = new StringBuilder("");
        ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps(calculateAnomaliesForMultipleFeature.columns()), str3 -> {
            return BoxesRunTime.boxToBoolean(str3.startsWith("predictedLabel"));
        })), str4 -> {
            return stringBuilder.append(new StringBuilder(7).append(str4).append("==1 or ").toString());
        }, ClassTag$.MODULE$.apply(StringBuilder.class));
        if (!stringBuilder.nonEmpty()) {
            DistADLogger$.MODULE$.LOG().info(new StringBuilder(16).append("Operation took: ").append(System.currentTimeMillis() - currentTimeMillis).toString());
            DistADLogger$.MODULE$.LOG().info("Total number of anomalies 0");
            return this.spark.emptyDataFrame();
        }
        Dataset<Row> select = calculateAnomaliesForMultipleFeature.filter(stringBuilder.substring(0, stringBuilder.length() - 4)).select(Predef$.MODULE$.copyArrayToImmutableIndexedSeq((Column[]) ArrayOps$.MODULE$.map$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps(calculateAnomaliesForMultipleFeature.columns()), str5 -> {
            return BoxesRunTime.boxToBoolean($anonfun$run$6(str5));
        })), str6 -> {
            return functions$.MODULE$.col(str6);
        }, ClassTag$.MODULE$.apply(Column.class))));
        if (this.config.verbose()) {
            select.show(false);
        }
        DistADLogger$.MODULE$.LOG().info(new StringBuilder(16).append("Operation took: ").append(System.currentTimeMillis() - currentTimeMillis).toString());
        DistADLogger$.MODULE$.LOG().info(new StringBuilder(26).append("Total number of anomalies ").append(select.count()).toString());
        return select;
    }

    public Dataset<Row> calculateAnomaliesForMultipleFeature(Dataset<Row> dataset) {
        ObjectRef create = ObjectRef.create(dataset);
        if (this.config.verbose()) {
            dataset.show(false);
        }
        IsolationForest randomSeed = new IsolationForest().setNumEstimators(this.config.numEstimatorsForIF()).setBootstrap(false).setMaxSamples(this.config.maxSampleForIF()).setFeaturesCol("features").setPredictionCol("predictedLabel").setScoreCol("outlierScore").setContamination(0.1d).setContaminationError(0.01d * 0.1d).setRandomSeed(1L);
        ObjectRef create2 = ObjectRef.create((Dataset) create.elem);
        RichInt$.MODULE$.until$extension(Predef$.MODULE$.intWrapper(0), this.config.numberOfClusters()).foreach$mVc$sp(i -> {
            ObjectRef create3 = ObjectRef.create(dataset.filter(functions$.MODULE$.col("prediction").$eq$eq$eq(BoxesRunTime.boxToInteger(i))));
            ObjectRef create4 = ObjectRef.create((String[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps((Object[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps(((Dataset) create3.elem).columns()), str -> {
                return BoxesRunTime.boxToBoolean($anonfun$calculateAnomaliesForMultipleFeature$2(str));
            })), str2 -> {
                return BoxesRunTime.boxToBoolean($anonfun$calculateAnomaliesForMultipleFeature$3(str2));
            }));
            ArrayOps$.MODULE$.foreach$extension(Predef$.MODULE$.refArrayOps((String[]) create4.elem), str3 -> {
                $anonfun$calculateAnomaliesForMultipleFeature$4(create3, create, create4, str3);
                return BoxedUnit.UNIT;
            });
            Dataset fill = ((Dataset) create3.elem).na().fill(Double.MAX_VALUE);
            if (this.config.verbose()) {
                fill.show(false);
            }
            Dataset transform = new VectorAssembler().setInputCols((String[]) create4.elem).setOutputCol("features").transform(fill);
            if (this.config.verbose()) {
                transform.show(false);
            }
            if (transform.isEmpty()) {
                return;
            }
            try {
                Dataset transform2 = randomSeed.fit(transform).transform(transform);
                if (this.config.verbose()) {
                    transform2.show(false);
                }
                String sb = new StringBuilder(15).append("predictedLabel_").append(i).toString();
                create2.elem = ((Dataset) create2.elem).join(transform2.withColumnRenamed("predictedLabel", sb).select("s", ScalaRunTime$.MODULE$.wrapRefArray(new String[]{sb})), new $colon.colon("s", Nil$.MODULE$), "leftouter");
                if (this.config.verbose()) {
                    ((Dataset) create2.elem).show(false);
                }
            } catch (Exception e) {
                DistADLogger$.MODULE$.LOG().warn("Number of selected setMaxSamples for IF is too much");
            }
        });
        return (Dataset) create2.elem;
    }

    public Dataset<Row> addClusterIdToData(Dataset<Row> dataset, Dataset<Row> dataset2) {
        return dataset.join(dataset2, "s").cache().drop("extractedFeatures").drop("features").drop("p").drop("o");
    }

    public static final /* synthetic */ void $anonfun$run$1(Object obj) {
        DistADLogger$.MODULE$.LOG().info(obj);
    }

    public static final /* synthetic */ void $anonfun$run$3(ObjectRef objectRef, String str) {
        if (str.equals("s")) {
            return;
        }
        objectRef.elem = ((Dataset) objectRef.elem).withColumn(str, functions$.MODULE$.col(str).cast(DoubleType$.MODULE$));
    }

    public static final /* synthetic */ boolean $anonfun$run$6(String str) {
        return !str.startsWith("predictedLabel");
    }

    public static final /* synthetic */ boolean $anonfun$calculateAnomaliesForMultipleFeature$2(String str) {
        return !str.equals("prediction");
    }

    public static final /* synthetic */ boolean $anonfun$calculateAnomaliesForMultipleFeature$3(String str) {
        return !str.equals("s");
    }

    public static final /* synthetic */ boolean $anonfun$calculateAnomaliesForMultipleFeature$5(String str, String str2) {
        return !str2.equals(str);
    }

    public static final /* synthetic */ void $anonfun$calculateAnomaliesForMultipleFeature$4(ObjectRef objectRef, ObjectRef objectRef2, ObjectRef objectRef3, String str) {
        if (((Dataset) objectRef.elem).select(str, Nil$.MODULE$).distinct().count() == 1 && ((Row) ((Dataset) objectRef.elem).select(str, Nil$.MODULE$).first()).isNullAt(0)) {
            objectRef.elem = ((Dataset) objectRef.elem).drop(str);
            objectRef2.elem = ((Dataset) objectRef2.elem).drop(str);
            objectRef3.elem = (String[]) ArrayOps$.MODULE$.filter$extension(Predef$.MODULE$.refArrayOps((String[]) objectRef3.elem), str2 -> {
                return BoxesRunTime.boxToBoolean($anonfun$calculateAnomaliesForMultipleFeature$5(str, str2));
            });
        }
    }

    public MultiFeatureAnomalyDetection(SparkSession sparkSession, RDD<Triple> rdd, DistADConfig distADConfig) {
        this.spark = sparkSession;
        this.originalDataRDD = rdd;
        this.config = distADConfig;
    }
}
