package net.sansa_stack.ml.spark.anomalydetection;

import org.apache.jena.graph.Triple;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.MatchError;
import scala.Predef$;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.ObjectRef;

/* compiled from: NumericLiteralAnomalyDetection.scala */
@ScalaSignature(bytes = "\u0006\u0001e3Aa\u0002\u0005\u0001'!A1\u0002\u0001B\u0001B\u0003%!\u0004\u0003\u0005&\u0001\t\u0005\t\u0015!\u0003'\u0011!!\u0004A!A!\u0002\u0013)\u0004\"B\u001d\u0001\t\u0003Q\u0004\"B \u0001\t\u0003\u0001\u0005\"B*\u0001\t\u0003!&A\b(v[\u0016\u0014\u0018n\u0019'ji\u0016\u0014\u0018\r\\!o_6\fG.\u001f#fi\u0016\u001cG/[8o\u0015\tI!\"\u0001\tb]>l\u0017\r\\=eKR,7\r^5p]*\u00111\u0002D\u0001\u0006gB\f'o\u001b\u0006\u0003\u001b9\t!!\u001c7\u000b\u0005=\u0001\u0012aC:b]N\fwl\u001d;bG.T\u0011!E\u0001\u0004]\u0016$8\u0001A\n\u0003\u0001Q\u0001\"!\u0006\r\u000e\u0003YQ\u0011aF\u0001\u0006g\u000e\fG.Y\u0005\u00033Y\u0011a!\u00118z%\u00164\u0007CA\u000e$\u001b\u0005a\"BA\u000f\u001f\u0003\r\u0019\u0018\u000f\u001c\u0006\u0003\u0017}Q!\u0001I\u0011\u0002\r\u0005\u0004\u0018m\u00195f\u0015\u0005\u0011\u0013aA8sO&\u0011A\u0005\b\u0002\r'B\f'o[*fgNLwN\\\u0001\u0010_JLw-\u001b8bY\u0012\u000bG/\u0019*E\tB\u0019qE\u000b\u0017\u000e\u0003!R!!\u000b\u0010\u0002\u0007I$G-\u0003\u0002,Q\t\u0019!\u000b\u0012#\u0011\u00055\u0012T\"\u0001\u0018\u000b\u0005=\u0002\u0014!B4sCBD'BA\u0019 \u0003\u0011QWM\\1\n\u0005Mr#A\u0002+sSBdW-\u0001\u0004d_:4\u0017n\u001a\t\u0003m]j\u0011\u0001C\u0005\u0003q!\u0011A\u0002R5ti\u0006#5i\u001c8gS\u001e\fa\u0001P5oSRtD\u0003B\u001e={y\u0002\"A\u000e\u0001\t\u000b-!\u0001\u0019\u0001\u000e\t\u000b\u0015\"\u0001\u0019\u0001\u0014\t\u000bQ\"\u0001\u0019A\u001b\u0002\u0007I,h\u000eF\u0001B!\t\u0011\u0005K\u0004\u0002D\u001d:\u0011A)\u0014\b\u0003\u000b2s!AR&\u000f\u0005\u001dSU\"\u0001%\u000b\u0005%\u0013\u0012A\u0002\u001fs_>$h(C\u0001#\u0013\t\u0001\u0013%\u0003\u0002\f?%\u0011QDH\u0005\u0003\u001fr\tq\u0001]1dW\u0006<W-\u0003\u0002R%\nIA)\u0019;b\rJ\fW.\u001a\u0006\u0003\u001fr\t!#\u00193e\u00072,8\u000f^3s\u0013\u0012$v\u000eR1uCR\u0019\u0011)V,\t\u000bY3\u0001\u0019A!\u0002\t\u0011\fG/\u0019\u0005\u00061\u001a\u0001\r!Q\u0001\faJ,G-[2uS>t7\u000f")
/* loaded from: input_file:net/sansa_stack/ml/spark/anomalydetection/NumericLiteralAnomalyDetection.class */
public class NumericLiteralAnomalyDetection {
    private final RDD<Triple> originalDataRDD;
    private final DistADConfig config;

    public Dataset<Row> run() {
        Dataset<Row> calculateMinHashLSHClustering;
        Dataset<Row> mad;
        Dataset<Row> calculateBiSectingKmeanClustering;
        long currentTimeMillis = System.currentTimeMillis();
        RDD<Triple> triplesWithNumericLitWithTypeIgnoreEndingWithID = DistADUtil$.MODULE$.triplesWithNumericLitWithTypeIgnoreEndingWithID(this.originalDataRDD);
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Original Data RDD Only with numeric Literals:");
            new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) triplesWithNumericLitWithTypeIgnoreEndingWithID.take(10))).foreach(obj -> {
                $anonfun$run$1(obj);
                return BoxedUnit.UNIT;
            });
        }
        Dataset<Row> cache = DistADUtil$.MODULE$.createDFWithConversion(triplesWithNumericLitWithTypeIgnoreEndingWithID).cache();
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Original Data DataFrame Only with numeric Literals:");
            cache.show(false);
        }
        String clusteringMethod = this.config.clusteringMethod();
        String BISECTINGKMEANS = this.config.BISECTINGKMEANS();
        if (BISECTINGKMEANS != null ? !BISECTINGKMEANS.equals(clusteringMethod) : clusteringMethod != null) {
            String MINHASHLSH = this.config.MINHASHLSH();
            if (MINHASHLSH != null ? !MINHASHLSH.equals(clusteringMethod) : clusteringMethod != null) {
                throw new MatchError(clusteringMethod);
            }
            calculateMinHashLSHClustering = DistADUtil$.MODULE$.calculateMinHashLSHClustering(triplesWithNumericLitWithTypeIgnoreEndingWithID, this.originalDataRDD, this.config);
        } else {
            if (this.config.silhouetteMethod()) {
                this.config.numberOfClusters_$eq(DistADUtil$.MODULE$.detectNumberOfClusters(cache, this.config.silhouetteMethodSamplingRate()));
                DistADLogger$.MODULE$.LOG().info(new StringBuilder(45).append("Number of optimal cluster for the dataset is ").append(this.config.numberOfClusters()).toString());
            }
            String clusteringType = this.config.clusteringType();
            String PARTIAL = this.config.PARTIAL();
            if (PARTIAL != null ? !PARTIAL.equals(clusteringType) : clusteringType != null) {
                String FULL = this.config.FULL();
                if (FULL != null ? !FULL.equals(clusteringType) : clusteringType != null) {
                    throw new MatchError(clusteringType);
                }
                calculateBiSectingKmeanClustering = DistADUtil$.MODULE$.calculateBiSectingKmeanClustering(this.originalDataRDD, this.config.numberOfClusters());
            } else {
                calculateBiSectingKmeanClustering = DistADUtil$.MODULE$.calculateBiSectingKmeanClustering(triplesWithNumericLitWithTypeIgnoreEndingWithID, this.config.numberOfClusters());
            }
            Dataset<Row> dataset = calculateBiSectingKmeanClustering;
            if (this.config.verbose()) {
                DistADLogger$.MODULE$.LOG().info(new StringBuilder(36).append("Result of clustering with ").append(this.config.numberOfClusters()).append(" clusters:").toString());
                dataset.show(false);
            }
            calculateMinHashLSHClustering = addClusterIdToData(cache, dataset);
        }
        Dataset<Row> dataset2 = calculateMinHashLSHClustering;
        if (this.config.verbose()) {
            DistADLogger$.MODULE$.LOG().info("Add clustering result to data:");
            dataset2.show(false);
        }
        String anomalyDetectionAlgorithm = this.config.anomalyDetectionAlgorithm();
        String IQR = this.config.IQR();
        if (IQR != null ? !IQR.equals(anomalyDetectionAlgorithm) : anomalyDetectionAlgorithm != null) {
            String ZSCORE = this.config.ZSCORE();
            if (ZSCORE != null ? !ZSCORE.equals(anomalyDetectionAlgorithm) : anomalyDetectionAlgorithm != null) {
                String MAD = this.config.MAD();
                if (MAD != null ? !MAD.equals(anomalyDetectionAlgorithm) : anomalyDetectionAlgorithm != null) {
                    throw new MatchError(anomalyDetectionAlgorithm);
                }
                mad = DistADUtil$.MODULE$.mad(dataset2, this.config.verbose(), this.config.anomalyListSize());
            } else {
                mad = DistADUtil$.MODULE$.zscore(dataset2, this.config.verbose(), this.config.anomalyListSize());
            }
        } else {
            mad = DistADUtil$.MODULE$.iqr(dataset2, this.config.verbose(), this.config.anomalyListSize());
        }
        Dataset<Row> dataset3 = mad;
        DistADLogger$.MODULE$.LOG().info(new StringBuilder(16).append("Operation took: ").append(System.currentTimeMillis() - currentTimeMillis).toString());
        return dataset3;
    }

    public Dataset<Row> addClusterIdToData(Dataset<Row> dataset, Dataset<Row> dataset2) {
        ObjectRef create = ObjectRef.create(dataset.join(dataset2, "s").cache());
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset2.columns())).foreach(str -> {
            $anonfun$addClusterIdToData$1(create, str);
            return BoxedUnit.UNIT;
        });
        return (Dataset) create.elem;
    }

    public static final /* synthetic */ void $anonfun$run$1(Object obj) {
        DistADLogger$.MODULE$.LOG().info(obj);
    }

    public static final /* synthetic */ void $anonfun$addClusterIdToData$1(ObjectRef objectRef, String str) {
        if (str.equals("s") || str.equals("prediction")) {
            return;
        }
        objectRef.elem = ((Dataset) objectRef.elem).drop(str);
    }

    public NumericLiteralAnomalyDetection(SparkSession sparkSession, RDD<Triple> rdd, DistADConfig distADConfig) {
        this.originalDataRDD = rdd;
        this.config = distADConfig;
    }
}
