package net.sansa_stack.ml.spark.outliers.vandalismdetection;

import org.apache.spark.SparkContext;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.classification.DecisionTreeClassifier;
import org.apache.spark.ml.classification.GBTClassifier;
import org.apache.spark.ml.classification.LogisticRegression;
import org.apache.spark.ml.classification.MultilayerPerceptronClassifier;
import org.apache.spark.ml.classification.RandomForestClassifier;
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator;
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
import org.apache.spark.ml.feature.IndexToString;
import org.apache.spark.ml.feature.StringIndexer;
import org.apache.spark.ml.feature.VectorIndexer;
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import scala.Array$;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.runtime.DoubleRef;

/* compiled from: Classifiers.scala */
@ScalaSignature(bytes = "\u0006\u0001\r4A!\u0001\u0002\u0001\u001f\tY1\t\\1tg&4\u0017.\u001a:t\u0015\t\u0019A!\u0001\nwC:$\u0017\r\\5t[\u0012,G/Z2uS>t'BA\u0003\u0007\u0003!yW\u000f\u001e7jKJ\u001c(BA\u0004\t\u0003\u0015\u0019\b/\u0019:l\u0015\tI!\"\u0001\u0002nY*\u00111\u0002D\u0001\fg\u0006t7/Y0ti\u0006\u001c7NC\u0001\u000e\u0003\rqW\r^\u0002\u0001'\r\u0001\u0001C\u0006\t\u0003#Qi\u0011A\u0005\u0006\u0002'\u0005)1oY1mC&\u0011QC\u0005\u0002\u0007\u0003:L(+\u001a4\u0011\u0005E9\u0012B\u0001\r\u0013\u00051\u0019VM]5bY&T\u0018M\u00197f\u0011\u0015Q\u0002\u0001\"\u0001\u001c\u0003\u0019a\u0014N\\5u}Q\tA\u0004\u0005\u0002\u001e\u00015\t!\u0001C\u0003 \u0001\u0011\u0005\u0001%A\u000bSC:$w.\u001c$pe\u0016\u001cHo\u00117bgNLg-\u001a:\u0015\t\u0005B3)\u0012\t\u0003E\u0015r!!E\u0012\n\u0005\u0011\u0012\u0012A\u0002)sK\u0012,g-\u0003\u0002'O\t11\u000b\u001e:j]\u001eT!\u0001\n\n\t\u000b%r\u0002\u0019\u0001\u0016\u0002\u0017\u00113u\f\u0016:bS:Lgn\u001a\t\u0003W\u0001s!\u0001L\u001f\u000f\u00055RdB\u0001\u00189\u001d\tySG\u0004\u00021g5\t\u0011G\u0003\u00023\u001d\u00051AH]8pizJ\u0011\u0001N\u0001\u0004_J<\u0017B\u0001\u001c8\u0003\u0019\t\u0007/Y2iK*\tA'\u0003\u0002\bs)\u0011agN\u0005\u0003wq\n1a]9m\u0015\t9\u0011(\u0003\u0002?\u007f\u00059\u0001/Y2lC\u001e,'BA\u001e=\u0013\t\t%IA\u0005ECR\fgI]1nK*\u0011ah\u0010\u0005\u0006\tz\u0001\rAK\u0001\u000b\t\u001a{F+Z:uS:<\u0007\"\u0002$\u001f\u0001\u00049\u0015AA:d!\tA\u0015*D\u0001=\u0013\tQEH\u0001\u0007Ta\u0006\u00148nQ8oi\u0016DH\u000fC\u0003M\u0001\u0011\u0005Q*\u0001\fEK\u000eL7/[8o)J,Wm\u00117bgNLg-[3s)\u0011\tcj\u0014)\t\u000b%Z\u0005\u0019\u0001\u0016\t\u000b\u0011[\u0005\u0019\u0001\u0016\t\u000b\u0019[\u0005\u0019A$\t\u000bI\u0003A\u0011A*\u0002#1{w-[:uS\u000e\u0014Vm\u001a:jg&|g\u000e\u0006\u0003\")V3\u0006\"B\u0015R\u0001\u0004Q\u0003\"\u0002#R\u0001\u0004Q\u0003\"\u0002$R\u0001\u00049\u0005\"\u0002-\u0001\t\u0003I\u0016aE$sC\u0012LWM\u001c;C_>\u001cH/\u001a3Ue\u0016,G\u0003B\u0011[7rCQ!K,A\u0002)BQ\u0001R,A\u0002)BQAR,A\u0002\u001dCQA\u0018\u0001\u0005\u0002}\u000ba$T;mi&d\u0017-_3s!\u0016\u00148-\u001a9ue>t7\t\\1tg&4\u0017.\u001a:\u0015\t\u0005\u0002\u0017M\u0019\u0005\u0006Su\u0003\rA\u000b\u0005\u0006\tv\u0003\rA\u000b\u0005\u0006\rv\u0003\ra\u0012")
/* loaded from: input_file:net/sansa_stack/ml/spark/outliers/vandalismdetection/Classifiers.class */
public class Classifiers implements Serializable {
    public String RandomForestClassifer(Dataset<Row> dataset, Dataset<Row> dataset2, SparkContext sparkContext) {
        SQLContext sQLContext = new SQLContext(sparkContext);
        dataset.registerTempTable("DB1");
        dataset2.registerTempTable("DB2");
        Dataset sql = sQLContext.sql("select Rid, features,FinalROLLBACK_REVERTED  from DB1");
        Dataset sql2 = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB2");
        PipelineStage fit = new StringIndexer().setInputCol("FinalROLLBACK_REVERTED").setOutputCol("indexedLabel").fit(sql);
        Dataset transform = new Pipeline().setStages(new PipelineStage[]{fit, new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").setMaxCategories(4).fit(sql), (RandomForestClassifier) new RandomForestClassifier().setImpurity("gini").setMaxDepth(3).setNumTrees(20).setFeatureSubsetStrategy("auto").setSeed(5043L).setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures"), new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(fit.labels())}).fit(sql).transform(sql2);
        transform.select("Rid", Predef$.MODULE$.wrapRefArray(new String[]{"features", "FinalROLLBACK_REVERTED", "predictedLabel"}));
        transform.show();
        BinaryClassificationEvaluator rawPredictionCol = new BinaryClassificationEvaluator().setLabelCol("indexedLabel").setRawPredictionCol("rawPrediction");
        DoubleRef create = DoubleRef.create(0.0d);
        double printlnMetricCAse1$1 = printlnMetricCAse1$1("areaUnderROC", transform, rawPredictionCol, create);
        double printlnMetricCAse1$12 = printlnMetricCAse1$1("areaUnderPR", transform, rawPredictionCol, create);
        MulticlassClassificationEvaluator predictionCol = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction");
        DoubleRef create2 = DoubleRef.create(0.0d);
        return new StringBuilder().append("ROC=").append(BoxesRunTime.boxToDouble(printlnMetricCAse1$1).toString()).append("|").append("PR=").append(BoxesRunTime.boxToDouble(printlnMetricCAse1$12).toString()).append("|").append("accuracy=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$1("accuracy", transform, predictionCol, create2)).toString()).append("|").append("Precision=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$1("weightedPrecision", transform, predictionCol, create2)).toString()).append("|").append("Recall=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$1("weightedRecall", transform, predictionCol, create2)).toString()).toString();
    }

    public String DecisionTreeClassifier(Dataset<Row> dataset, Dataset<Row> dataset2, SparkContext sparkContext) {
        SQLContext sQLContext = new SQLContext(sparkContext);
        dataset.registerTempTable("DB1");
        dataset2.registerTempTable("DB2");
        Dataset sql = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB1");
        Dataset sql2 = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB2");
        PipelineStage fit = new StringIndexer().setInputCol("FinalROLLBACK_REVERTED").setOutputCol("indexedLabel").fit(sql);
        Dataset transform = new Pipeline().setStages(new PipelineStage[]{fit, new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").setMaxCategories(4).fit(sql), (DecisionTreeClassifier) new DecisionTreeClassifier().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures"), new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(fit.labels())}).fit(sql).transform(sql2);
        BinaryClassificationEvaluator rawPredictionCol = new BinaryClassificationEvaluator().setLabelCol("indexedLabel").setRawPredictionCol("rawPrediction");
        DoubleRef create = DoubleRef.create(0.0d);
        double printlnMetricCAse1$2 = printlnMetricCAse1$2("areaUnderROC", transform, rawPredictionCol, create);
        double printlnMetricCAse1$22 = printlnMetricCAse1$2("areaUnderPR", transform, rawPredictionCol, create);
        MulticlassClassificationEvaluator predictionCol = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction");
        DoubleRef create2 = DoubleRef.create(0.0d);
        return new StringBuilder().append("ROC=").append(BoxesRunTime.boxToDouble(printlnMetricCAse1$2).toString()).append("|").append("PR=").append(BoxesRunTime.boxToDouble(printlnMetricCAse1$22).toString()).append("|").append("accuracy=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$2("accuracy", transform, predictionCol, create2)).toString()).append("|").append("Precision=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$2("weightedPrecision", transform, predictionCol, create2)).toString()).append("|").append("Recall=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$2("weightedRecall", transform, predictionCol, create2)).toString()).toString();
    }

    public String LogisticRegrision(Dataset<Row> dataset, Dataset<Row> dataset2, SparkContext sparkContext) {
        SQLContext sQLContext = new SQLContext(sparkContext);
        dataset.registerTempTable("DB1");
        dataset2.registerTempTable("DB2");
        Dataset sql = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB1");
        Dataset sql2 = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB2");
        PipelineStage fit = new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(sql);
        Dataset transform = new Pipeline().setStages(new PipelineStage[]{fit, new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").setMaxCategories(4).fit(sql), new LogisticRegression().setMaxIter(10).setRegParam(0.3d).setElasticNetParam(0.8d).setFamily("multinomial"), new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(fit.labels())}).fit(sql).transform(sql2);
        transform.select("Rid", Predef$.MODULE$.wrapRefArray(new String[]{"features", "label", "predictedLabel"}));
        transform.show();
        BinaryClassificationEvaluator rawPredictionCol = new BinaryClassificationEvaluator().setLabelCol("indexedLabel").setRawPredictionCol("rawPrediction");
        DoubleRef create = DoubleRef.create(0.0d);
        double printlnMetricCase1$1 = printlnMetricCase1$1("areaUnderROC", transform, rawPredictionCol, create);
        double printlnMetricCase1$12 = printlnMetricCase1$1("areaUnderPR", transform, rawPredictionCol, create);
        MulticlassClassificationEvaluator predictionCol = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction");
        DoubleRef create2 = DoubleRef.create(0.0d);
        return new StringBuilder().append("ROC=").append(BoxesRunTime.boxToDouble(printlnMetricCase1$1).toString()).append("|").append("PR=").append(BoxesRunTime.boxToDouble(printlnMetricCase1$12).toString()).append("|").append("accuracy=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$3("accuracy", transform, predictionCol, create2)).toString()).append("|").append("Precision=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$3("weightedPrecision", transform, predictionCol, create2)).toString()).append("|").append("Recall=").append(BoxesRunTime.boxToDouble(printlnMetricCase2$3("weightedRecall", transform, predictionCol, create2)).toString()).toString();
    }

    public String GradientBoostedTree(Dataset<Row> dataset, Dataset<Row> dataset2, SparkContext sparkContext) {
        SQLContext sQLContext = new SQLContext(sparkContext);
        dataset.registerTempTable("DB1");
        dataset2.registerTempTable("DB2");
        Dataset cache = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB1").cache();
        Dataset cache2 = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED  from DB2").cache();
        PipelineStage fit = new StringIndexer().setInputCol("FinalROLLBACK_REVERTED").setOutputCol("indexedLabel").fit(cache);
        Dataset transform = new Pipeline().setStages(new PipelineStage[]{fit, new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").setMaxCategories(4).fit(cache), (GBTClassifier) new GBTClassifier().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures"), new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(fit.labels())}).fit(cache).transform(cache2);
        BinaryClassificationMetrics binaryClassificationMetrics = new BinaryClassificationMetrics(transform.select("prediction", Predef$.MODULE$.wrapRefArray(new String[]{"FinalROLLBACK_REVERTED"})).rdd().map(new Classifiers$$anonfun$1(this), ClassTag$.MODULE$.apply(Tuple2.class)));
        Predef$.MODULE$.println(new StringBuilder().append("Area under ROC = ").append(BoxesRunTime.boxToDouble(binaryClassificationMetrics.areaUnderROC())).toString());
        Predef$.MODULE$.println(new StringBuilder().append("Area under PR = ").append(BoxesRunTime.boxToDouble(binaryClassificationMetrics.areaUnderPR())).toString());
        double areaUnderROC = binaryClassificationMetrics.areaUnderROC();
        double areaUnderPR = binaryClassificationMetrics.areaUnderPR();
        MulticlassClassificationEvaluator predictionCol = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction");
        DoubleRef create = DoubleRef.create(0.0d);
        return new StringBuilder().append("ROC=").append(BoxesRunTime.boxToDouble(areaUnderROC).toString()).append("|").append("PR=").append(BoxesRunTime.boxToDouble(areaUnderPR).toString()).append("|").append("accuracy=").append(BoxesRunTime.boxToDouble(printlnMetric$1("accuracy", transform, predictionCol, create)).toString()).append("|").append("Precision=").append(BoxesRunTime.boxToDouble(printlnMetric$1("weightedPrecision", transform, predictionCol, create)).toString()).append("|").append("Recall=").append(BoxesRunTime.boxToDouble(printlnMetric$1("weightedRecall", transform, predictionCol, create)).toString()).toString();
    }

    public String MultilayerPerceptronClassifier(Dataset<Row> dataset, Dataset<Row> dataset2, SparkContext sparkContext) {
        SQLContext sQLContext = new SQLContext(sparkContext);
        dataset.registerTempTable("DB1");
        dataset2.registerTempTable("DB2");
        Dataset sql = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB1");
        Dataset sql2 = sQLContext.sql("select Rid, features, FinalROLLBACK_REVERTED as label from DB2");
        PipelineStage fit = new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(sql);
        Dataset transform = new Pipeline().setStages(new PipelineStage[]{fit, new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").setMaxCategories(4).fit(sql), new MultilayerPerceptronClassifier().setLayers((int[]) Array$.MODULE$.apply(Predef$.MODULE$.wrapIntArray(new int[]{100, 5, 4, 2}), ClassTag$.MODULE$.Int())).setBlockSize(128).setSeed(1234L).setMaxIter(100), new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(fit.labels())}).fit(sql).transform(sql2);
        Dataset select = transform.select("prediction", Predef$.MODULE$.wrapRefArray(new String[]{"label"}));
        BinaryClassificationMetrics binaryClassificationMetrics = new BinaryClassificationMetrics(transform.select("prediction", Predef$.MODULE$.wrapRefArray(new String[]{"label"})).rdd().map(new Classifiers$$anonfun$2(this), ClassTag$.MODULE$.apply(Tuple2.class)));
        Predef$.MODULE$.println(new StringBuilder().append("Area under ROC = ").append(BoxesRunTime.boxToDouble(binaryClassificationMetrics.areaUnderROC())).toString());
        Predef$.MODULE$.println(new StringBuilder().append("Area under PR = ").append(BoxesRunTime.boxToDouble(binaryClassificationMetrics.areaUnderPR())).toString());
        double areaUnderROC = binaryClassificationMetrics.areaUnderROC();
        double areaUnderPR = binaryClassificationMetrics.areaUnderPR();
        MulticlassClassificationEvaluator metricName = new MulticlassClassificationEvaluator().setMetricName("accuracy");
        MulticlassClassificationEvaluator metricName2 = new MulticlassClassificationEvaluator().setMetricName("weightedPrecision");
        MulticlassClassificationEvaluator metricName3 = new MulticlassClassificationEvaluator().setMetricName("weightedRecall");
        Predef$.MODULE$.println(new StringBuilder().append("Accuracy = ").append(BoxesRunTime.boxToDouble(metricName.evaluate(select))).toString());
        Predef$.MODULE$.println(new StringBuilder().append("weightedPrecision = ").append(BoxesRunTime.boxToDouble(metricName2.evaluate(select))).toString());
        Predef$.MODULE$.println(new StringBuilder().append("weightedRecall = ").append(BoxesRunTime.boxToDouble(metricName3.evaluate(select))).toString());
        return new StringBuilder().append("ROC=").append(BoxesRunTime.boxToDouble(areaUnderROC).toString()).append("|").append("PR=").append(BoxesRunTime.boxToDouble(areaUnderPR).toString()).append("|").append("accuracy=").append(BoxesRunTime.boxToDouble(metricName.evaluate(select)).toString()).append("|").append("Precision=").append(BoxesRunTime.boxToDouble(metricName2.evaluate(select)).toString()).append("|").append("Recall=").append(BoxesRunTime.boxToDouble(metricName3.evaluate(select)).toString()).toString();
    }

    private final double printlnMetricCAse1$1(String str, Dataset dataset, BinaryClassificationEvaluator binaryClassificationEvaluator, DoubleRef doubleRef) {
        doubleRef.elem = binaryClassificationEvaluator.setMetricName(str).evaluate(dataset);
        Predef$.MODULE$.println(new StringBuilder().append(str).append(" = ").append(BoxesRunTime.boxToDouble(doubleRef.elem)).toString());
        return doubleRef.elem;
    }

    private final double printlnMetricCase2$1(String str, Dataset dataset, MulticlassClassificationEvaluator multiclassClassificationEvaluator, DoubleRef doubleRef) {
        doubleRef.elem = multiclassClassificationEvaluator.setMetricName(str).evaluate(dataset);
        Predef$.MODULE$.println(new StringBuilder().append(str).append(" = ").append(BoxesRunTime.boxToDouble(doubleRef.elem)).toString());
        return doubleRef.elem;
    }

    private final double printlnMetricCAse1$2(String str, Dataset dataset, BinaryClassificationEvaluator binaryClassificationEvaluator, DoubleRef doubleRef) {
        doubleRef.elem = binaryClassificationEvaluator.setMetricName(str).evaluate(dataset);
        Predef$.MODULE$.println(new StringBuilder().append(str).append(" = ").append(BoxesRunTime.boxToDouble(doubleRef.elem)).toString());
        return doubleRef.elem;
    }

    private final double printlnMetricCase2$2(String str, Dataset dataset, MulticlassClassificationEvaluator multiclassClassificationEvaluator, DoubleRef doubleRef) {
        doubleRef.elem = multiclassClassificationEvaluator.setMetricName(str).evaluate(dataset);
        Predef$.MODULE$.println(new StringBuilder().append(str).append(" = ").append(BoxesRunTime.boxToDouble(doubleRef.elem)).toString());
        return doubleRef.elem;
    }

    private final double printlnMetricCase1$1(String str, Dataset dataset, BinaryClassificationEvaluator binaryClassificationEvaluator, DoubleRef doubleRef) {
        doubleRef.elem = binaryClassificationEvaluator.setMetricName(str).evaluate(dataset);
        Predef$.MODULE$.println(new StringBuilder().append(str).append(" = ").append(BoxesRunTime.boxToDouble(doubleRef.elem)).toString());
        return doubleRef.elem;
    }

    private final double printlnMetricCase2$3(String str, Dataset dataset, MulticlassClassificationEvaluator multiclassClassificationEvaluator, DoubleRef doubleRef) {
        doubleRef.elem = multiclassClassificationEvaluator.setMetricName(str).evaluate(dataset);
        Predef$.MODULE$.println(new StringBuilder().append(str).append(" = ").append(BoxesRunTime.boxToDouble(doubleRef.elem)).toString());
        return doubleRef.elem;
    }

    private final double printlnMetric$1(String str, Dataset dataset, MulticlassClassificationEvaluator multiclassClassificationEvaluator, DoubleRef doubleRef) {
        doubleRef.elem = multiclassClassificationEvaluator.setMetricName(str).evaluate(dataset);
        Predef$.MODULE$.println(new StringBuilder().append(str).append(" = ").append(BoxesRunTime.boxToDouble(doubleRef.elem)).toString());
        return doubleRef.elem;
    }
}
