package net.sansa_stack.ml.spark.anomalydetection;

import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import scala.MatchError;
import scala.Predef$;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

/* compiled from: PredicateAnomalyDetection.scala */
@ScalaSignature(bytes = "\u0006\u0001e3Aa\u0002\u0005\u0001'!A1\u0002\u0001B\u0001B\u0003%!\u0004\u0003\u0005&\u0001\t\u0005\t\u0015!\u0003'\u0011!!\u0004A!A!\u0002\u0013)\u0004\"B\u001d\u0001\t\u0003Q\u0004\"B \u0001\t\u0003\u0001\u0005\"B*\u0001\t\u0003!&!\u0007)sK\u0012L7-\u0019;f\u0003:|W.\u00197z\t\u0016$Xm\u0019;j_:T!!\u0003\u0006\u0002!\u0005tw.\\1ms\u0012,G/Z2uS>t'BA\u0006\r\u0003\u0015\u0019\b/\u0019:l\u0015\tia\"\u0001\u0002nY*\u0011q\u0002E\u0001\fg\u0006t7/Y0ti\u0006\u001c7NC\u0001\u0012\u0003\rqW\r^\u0002\u0001'\t\u0001A\u0003\u0005\u0002\u001615\taCC\u0001\u0018\u0003\u0015\u00198-\u00197b\u0013\tIbC\u0001\u0004B]f\u0014VM\u001a\t\u00037\rj\u0011\u0001\b\u0006\u0003;y\t1a]9m\u0015\tYqD\u0003\u0002!C\u00051\u0011\r]1dQ\u0016T\u0011AI\u0001\u0004_J<\u0017B\u0001\u0013\u001d\u00051\u0019\u0006/\u0019:l'\u0016\u001c8/[8o\u0003=y'/[4j]\u0006dG)\u0019;b%\u0012#\u0005cA\u0014+Y5\t\u0001F\u0003\u0002*=\u0005\u0019!\u000f\u001a3\n\u0005-B#a\u0001*E\tB\u0011QFM\u0007\u0002])\u0011q\u0006M\u0001\u0006OJ\f\u0007\u000f\u001b\u0006\u0003c}\tAA[3oC&\u00111G\f\u0002\u0007)JL\u0007\u000f\\3\u0002\r\r|gNZ5h!\t1t'D\u0001\t\u0013\tA\u0004B\u0001\u0007ESN$\u0018\tR\"p]\u001aLw-\u0001\u0004=S:LGO\u0010\u000b\u0005wqjd\b\u0005\u00027\u0001!)1\u0002\u0002a\u00015!)Q\u0005\u0002a\u0001M!)A\u0007\u0002a\u0001k\u0005\u0019!/\u001e8\u0015\u0003\u0005\u0003\"A\u0011)\u000f\u0005\rseB\u0001#N\u001d\t)EJ\u0004\u0002G\u0017:\u0011qIS\u0007\u0002\u0011*\u0011\u0011JE\u0001\u0007yI|w\u000e\u001e \n\u0003\tJ!\u0001I\u0011\n\u0005-y\u0012BA\u000f\u001f\u0013\tyE$A\u0004qC\u000e\\\u0017mZ3\n\u0005E\u0013&!\u0003#bi\u00064%/Y7f\u0015\tyE$\u0001\nbI\u0012\u001cE.^:uKJLE\rV8ECR\fGcA!V/\")aK\u0002a\u0001\u0003\u0006!A-\u0019;b\u0011\u0015Af\u00011\u0001B\u0003-\u0001(/\u001a3jGRLwN\\:")
/* loaded from: input_file:net/sansa_stack/ml/spark/anomalydetection/PredicateAnomalyDetection.class */
public class PredicateAnomalyDetection {
    private final RDD<Triple> originalDataRDD;
    private final DistADConfig config;

    public Dataset<Row> run() {
        Dataset<Row> calculateMinHashLSHClustering;
        Dataset<Row> mad;
        long currentTimeMillis = System.currentTimeMillis();
        Dataset cache = DistADUtil$.MODULE$.createDF(this.originalDataRDD).cache();
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Original Data DataFrame:");
            cache.show(false);
        }
        Nil$ nil$ = Nil$.MODULE$;
        Dataset<Row> agg = nil$.isEmpty() ? cache.groupBy("s", Predef$.MODULE$.wrapRefArray(new String[]{"p"})).agg(functions$.MODULE$.count("p").as("o"), Predef$.MODULE$.wrapRefArray(new Column[0])) : cache.filter(row -> {
            return BoxesRunTime.boxToBoolean($anonfun$run$1(nil$, row));
        }).groupBy("s", Predef$.MODULE$.wrapRefArray(new String[]{"p"})).agg(functions$.MODULE$.count("p").as("o"), Predef$.MODULE$.wrapRefArray(new Column[0]));
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Result of counting dataframe:");
            agg.show(false);
        }
        RDD<Triple> rdd = agg.map(row2 -> {
            return Triple.create(NodeFactory.createURI(row2.get(0).toString()), NodeFactory.createURI(row2.get(1).toString()), NodeFactory.createLiteral(new StringBuilder(44).append(row2.get(2).toString()).append("^^<http://www.w3.org/2001/XMLSchema#integer>").toString()));
        }, Encoders$.MODULE$.kryo(Triple.class)).rdd();
        String clusteringMethod = this.config.clusteringMethod();
        String BISECTINGKMEANS = this.config.BISECTINGKMEANS();
        if (BISECTINGKMEANS != null ? !BISECTINGKMEANS.equals(clusteringMethod) : clusteringMethod != null) {
            String MINHASHLSH = this.config.MINHASHLSH();
            if (MINHASHLSH != null ? !MINHASHLSH.equals(clusteringMethod) : clusteringMethod != null) {
                throw new MatchError(clusteringMethod);
            }
            calculateMinHashLSHClustering = DistADUtil$.MODULE$.calculateMinHashLSHClustering(rdd, rdd, this.config);
        } else {
            if (this.config.silhouetteMethod()) {
                this.config.numberOfClusters_$eq(DistADUtil$.MODULE$.detectNumberOfClusters(agg, this.config.silhouetteMethodSamplingRate()));
                DistADLogger$.MODULE$.LOG().info(new StringBuilder(45).append("Number of optimal cluster for the dataset is ").append(this.config.numberOfClusters()).toString());
            }
            String clusteringType = this.config.clusteringType();
            String PARTIAL = this.config.PARTIAL();
            if (PARTIAL != null ? PARTIAL.equals(clusteringType) : clusteringType == null) {
                throw new Exception("Partial mode is not available for predicates");
            }
            String FULL = this.config.FULL();
            if (FULL != null ? !FULL.equals(clusteringType) : clusteringType != null) {
                throw new MatchError(clusteringType);
            }
            Dataset<Row> calculateBiSectingKmeanClustering = DistADUtil$.MODULE$.calculateBiSectingKmeanClustering(rdd, this.config.numberOfClusters());
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info(new StringBuilder(36).append("Result of clustering with ").append(this.config.numberOfClusters()).append(" clusters:").toString());
                calculateBiSectingKmeanClustering.show(false);
            }
            calculateMinHashLSHClustering = addClusterIdToData(agg, calculateBiSectingKmeanClustering);
        }
        Dataset<Row> dataset = calculateMinHashLSHClustering;
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Add clustering result to data:");
            dataset.show(false);
        }
        String anomalyDetectionAlgorithm = this.config.anomalyDetectionAlgorithm();
        String IQR = this.config.IQR();
        if (IQR != null ? !IQR.equals(anomalyDetectionAlgorithm) : anomalyDetectionAlgorithm != null) {
            String ZSCORE = this.config.ZSCORE();
            if (ZSCORE != null ? !ZSCORE.equals(anomalyDetectionAlgorithm) : anomalyDetectionAlgorithm != null) {
                String MAD = this.config.MAD();
                if (MAD != null ? !MAD.equals(anomalyDetectionAlgorithm) : anomalyDetectionAlgorithm != null) {
                    throw new MatchError(anomalyDetectionAlgorithm);
                }
                mad = DistADUtil$.MODULE$.mad(dataset, this.config.verbose(), this.config.anomalyListSize());
            } else {
                mad = DistADUtil$.MODULE$.zscore(dataset, this.config.verbose(), this.config.anomalyListSize());
            }
        } else {
            mad = DistADUtil$.MODULE$.iqr(dataset, this.config.verbose(), this.config.anomalyListSize());
        }
        Dataset<Row> dataset2 = mad;
        DistADLogger$.MODULE$.LOG().info(new StringBuilder(16).append("Operation took: ").append(System.currentTimeMillis() - currentTimeMillis).toString());
        return dataset2;
    }

    public Dataset<Row> addClusterIdToData(Dataset<Row> dataset, Dataset<Row> dataset2) {
        ObjectRef create = ObjectRef.create(dataset.join(dataset2, "s").cache());
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset2.columns())).foreach(str -> {
            $anonfun$addClusterIdToData$1(create, str);
            return BoxedUnit.UNIT;
        });
        return (Dataset) create.elem;
    }

    public static final /* synthetic */ boolean $anonfun$run$1(List list, Row row) {
        if (list.nonEmpty()) {
            return list.contains(row.getAs(1));
        }
        return true;
    }

    public static final /* synthetic */ void $anonfun$addClusterIdToData$1(ObjectRef objectRef, String str) {
        if (str.equals("s") || str.equals("prediction")) {
            return;
        }
        objectRef.elem = ((Dataset) objectRef.elem).drop(str);
    }

    public PredicateAnomalyDetection(SparkSession sparkSession, RDD<Triple> rdd, DistADConfig distADConfig) {
        this.originalDataRDD = rdd;
        this.config = distADConfig;
    }
}
