package net.sansa_stack.ml.spark.outliers.anomalydetection;

import net.sansa_stack.ml.common.outliers.anomalydetection.Utils$;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.apache.jena.graph.Triple;
import org.apache.spark.HashPartitioner;
import org.apache.spark.RangePartitioner;
import org.apache.spark.RangePartitioner$;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.feature.MinHashLSH;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.storage.StorageLevel$;
import scala.Function2;
import scala.MatchError;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.HashSet$;
import scala.collection.mutable.Set$;
import scala.collection.mutable.SetLike;
import scala.math.Ordering;
import scala.math.Ordering$Double$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;

/* compiled from: AnomalyWithHashingTF.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005\u0005f\u0001\u0002\r\u001a\u0001\u0019B\u0001\u0002\r\u0001\u0003\u0002\u0003\u0006I!\r\u0005\t\t\u0002\u0011\t\u0011)A\u0005\u000b\"A\u0011\f\u0001B\u0001B\u0003%Q\t\u0003\u0005[\u0001\t\u0005\t\u0015!\u0003\\\u0011!q\u0006A!A!\u0002\u0013)\u0005\u0002C0\u0001\u0005\u0003\u0005\u000b\u0011\u00021\t\u0011\u0019\u0004!\u0011!Q\u0001\nEC\u0001b\u001a\u0001\u0003\u0002\u0003\u0006I\u0001\u001b\u0005\u0006W\u0002!\t\u0001\u001c\u0005\u0006o\u0002!\t\u0001\u001f\u0005\b\u0003#\u0001A\u0011AA\n\u0011\u001d\t)\u0002\u0001C\u0001\u0003'Aq!a\u0006\u0001\t\u0003\tI\u0002C\u0004\u0002 \u0001!\t!!\t\t\u000f\u0005=\u0002\u0001\"\u0001\u00022!9\u00111\n\u0001\u0005\u0002\u00055\u0003bBA4\u0001\u0011\u0005\u0011\u0011\u000e\u0005\b\u0003_\u0002A\u0011AA9\u000f\u001d\t\t)\u0007E\u0001\u0003\u00073a\u0001G\r\t\u0002\u0005\u0015\u0005BB6\u0015\t\u0003\t9\tC\u0004\u0002\nR!\t!a#\t\u0013\u0005uE#!A\u0005\n\u0005}%\u0001F!o_6\fG._,ji\"D\u0015m\u001d5j]\u001e$fI\u0003\u0002\u001b7\u0005\u0001\u0012M\\8nC2LH-\u001a;fGRLwN\u001c\u0006\u00039u\t\u0001b\\;uY&,'o\u001d\u0006\u0003=}\tQa\u001d9be.T!\u0001I\u0011\u0002\u00055d'B\u0001\u0012$\u0003-\u0019\u0018M\\:b?N$\u0018mY6\u000b\u0003\u0011\n1A\\3u\u0007\u0001\u00192\u0001A\u0014.!\tA3&D\u0001*\u0015\u0005Q\u0013!B:dC2\f\u0017B\u0001\u0017*\u0005\u0019\te.\u001f*fMB\u0011\u0001FL\u0005\u0003_%\u0012AbU3sS\u0006d\u0017N_1cY\u0016\f1B\u001c+sSBdWm\u001d*E\tB\u0019!G\u000f\u001f\u000e\u0003MR!\u0001N\u001b\u0002\u0007I$GM\u0003\u0002\u001fm)\u0011q\u0007O\u0001\u0007CB\f7\r[3\u000b\u0003e\n1a\u001c:h\u0013\tY4GA\u0002S\t\u0012\u0003\"!\u0010\"\u000e\u0003yR!a\u0010!\u0002\u000b\u001d\u0014\u0018\r\u001d5\u000b\u0005\u00053\u0014\u0001\u00026f]\u0006L!a\u0011 \u0003\rQ\u0013\u0018\u000e\u001d7f\u0003\u001dy'M\u001b'jgR\u00042A\u0012(R\u001d\t9EJ\u0004\u0002I\u00176\t\u0011J\u0003\u0002KK\u00051AH]8pizJ\u0011AK\u0005\u0003\u001b&\nq\u0001]1dW\u0006<W-\u0003\u0002P!\n!A*[:u\u0015\ti\u0015\u0006\u0005\u0002S-:\u00111\u000b\u0016\t\u0003\u0011&J!!V\u0015\u0002\rA\u0013X\rZ3g\u0013\t9\u0006L\u0001\u0004TiJLgn\u001a\u0006\u0003+&\n1\u0002\u001e:ja2,7\u000fV=qK\u0006i!jU5n)\"\u0014Xm\u001d5pY\u0012\u0004\"\u0001\u000b/\n\u0005uK#A\u0002#pk\ndW-A\u0007mSN$8+\u001e9feRK\b/Z\u0001\rgB\f'o[*fgNLwN\u001c\t\u0003C\u0012l\u0011A\u0019\u0006\u0003GV\n1a]9m\u0013\t)'M\u0001\u0007Ta\u0006\u00148nU3tg&|g.\u0001\u0005isB,'O\\=n\u00031qW/\u001c)beRLG/[8o!\tA\u0013.\u0003\u0002kS\t\u0019\u0011J\u001c;\u0002\rqJg.\u001b;?)%iw\u000e]9sgR,h\u000f\u0005\u0002o\u00015\t\u0011\u0004C\u00031\u0013\u0001\u0007\u0011\u0007C\u0003E\u0013\u0001\u0007Q\tC\u0003Z\u0013\u0001\u0007Q\tC\u0003[\u0013\u0001\u00071\fC\u0003_\u0013\u0001\u0007Q\tC\u0003`\u0013\u0001\u0007\u0001\rC\u0003g\u0013\u0001\u0007\u0011\u000bC\u0003h\u0013\u0001\u0007\u0001.A\u0002sk:$\u0012!\u001f\t\u0004eiR\bc\u0001*|{&\u0011A\u0010\u0017\u0002\u0004'\u0016$\bC\u0002\u0015\u007f#F\u000b\t!\u0003\u0002��S\t1A+\u001e9mKN\u0002B!a\u0001\u0002\u000e5\u0011\u0011Q\u0001\u0006\u0005\u0003\u000f\tI!\u0001\u0003mC:<'BAA\u0006\u0003\u0011Q\u0017M^1\n\t\u0005=\u0011Q\u0001\u0002\u0007\u001f\nTWm\u0019;\u0002\r\u001d,G\u000fS=q)\u0005\t\u0014!D4fi>\u0013'.Z2u\u0019&\u001cH/A\u000bue&\u0004H.Z:XSRDg*^7fe&\u001cG*\u001b;\u0015\u0007E\nY\u0002\u0003\u0004\u0002\u001e5\u0001\r!M\u0001\u0007_\nTG*\u001b;\u0002\u001fA\u0014x\u000e]<ji\"\u001cXO\u00196fGR$B!a\t\u0002,A!!GOA\u0013!\u0015A\u0013qE)R\u0013\r\tI#\u000b\u0002\u0007)V\u0004H.\u001a\u001a\t\r\u00055b\u00021\u00012\u0003\u0005\t\u0017a\u0002:eMRK\b/\u001a\u000b\u0005\u0003g\t9\u0005\u0005\u00033u\u0005U\u0002C\u0002\u0015\u0002(E\u000b9\u0004E\u0003\u0002:\u0005\r\u0013+\u0004\u0002\u0002<)!\u0011QHA \u0003\u001diW\u000f^1cY\u0016T1!!\u0011*\u0003)\u0019w\u000e\u001c7fGRLwN\\\u0005\u0005\u0003\u000b\nYDA\u0004ICND7+\u001a;\t\r\u0005%s\u00021\u00012\u0003-9W\r\u001e%za\u0016\u0014h._7\u0002\u0017)\u001c\u0016.\\5mCJLG/\u001f\u000b\ns\u0006=\u00131KA,\u00037Ba!!\u0015\u0011\u0001\u0004\t\u0014!\u0007+sSBdWm],ji\"tU/\\3sS\u000ed\u0015\u000e^3sC2Dq!!\u0016\u0011\u0001\u0004\t\u0019#A\u0002yg\u0016Dq!!\u0017\u0011\u0001\u0004\t\u0019$A\u0007sI\u001a$\u0016\u0010]3E\u0005^L7.\u001b\u0005\b\u0003;\u0002\u0002\u0019AA0\u0003Ei\u0017\r]*vE^KG\u000f\u001b+sSBdWm\u001d\t\u0005ei\n\t\u0007\u0005\u0004)\u0003O\t\u00161\r\t\u0006\u0003s\t)'`\u0005\u0004y\u0006m\u0012A\u00049s_B\u001cE.^:uKJLgn\u001a\u000b\u0005\u0003?\nY\u0007\u0003\u0004\u0002nE\u0001\r!M\u0001\u001aiJL\u0007\u000f\\3t/&$\bNT;nKJL7\rT5uKJ\fG.\u0001\u0003jcJ\u0014DCBA:\u0003s\ni\b\u0005\u0003G\u0003kj\u0018bAA<!\n\u00191+Z9\t\u000f\u0005m$\u00031\u0001\u0002t\u000591\r\\;ti\u0016\u0014\bBBA@%\u0001\u0007\u0001.\u0001\tb]>l\u0017\r\\=MSN$H*[7ji\u0006!\u0012I\\8nC2Lx+\u001b;i\u0011\u0006\u001c\b.\u001b8h)\u001a\u0003\"A\u001c\u000b\u0014\u0007Q9S\u0006\u0006\u0002\u0002\u0004\u0006)\u0011\r\u001d9msR\tR.!$\u0002\u0010\u0006E\u00151SAK\u0003/\u000bI*a'\t\u000bA2\u0002\u0019A\u0019\t\u000b\u00113\u0002\u0019A#\t\u000be3\u0002\u0019A#\t\u000bi3\u0002\u0019A.\t\u000by3\u0002\u0019A#\t\u000b}3\u0002\u0019\u00011\t\u000b\u00194\u0002\u0019A)\t\u000b\u001d4\u0002\u0019\u00015\u0002\u0017I,\u0017\r\u001a*fg>dg/\u001a\u000b\u0003\u0003\u0003\u0001")
/* loaded from: input_file:net/sansa_stack/ml/spark/outliers/anomalydetection/AnomalyWithHashingTF.class */
public class AnomalyWithHashingTF implements Serializable {
    private final RDD<Triple> nTriplesRDD;
    private final List<String> objList;
    private final List<String> triplesType;
    private final SparkSession sparkSession;
    private final String hypernym;

    public static AnomalyWithHashingTF apply(RDD<Triple> rdd, List<String> list, List<String> list2, double d, List<String> list3, SparkSession sparkSession, String str, int i) {
        return AnomalyWithHashingTF$.MODULE$.apply(rdd, list, list2, d, list3, sparkSession, str, i);
    }

    public RDD<Set<Tuple3<String, String, Object>>> run() {
        RDD<Triple> triplesWithNumericLit = triplesWithNumericLit(getObjectList().filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$run$1(this, triple));
        }).filter(triple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$run$2(triple2));
        }));
        RDD<Tuple2<String, scala.collection.mutable.Set<Tuple3<String, String, Object>>>> propClustering = propClustering(triplesWithNumericLit);
        return jSimilarity(triplesWithNumericLit, propwithsubject(triplesWithNumericLit), RDD$.MODULE$.rddToPairRDDFunctions(propClustering, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(scala.collection.mutable.Set.class), Ordering$String$.MODULE$).join(rdfType(getHyp())).map(tuple2 -> {
            return new Tuple2(tuple2._1(), ((Tuple2) tuple2._2())._2());
        }, ClassTag$.MODULE$.apply(Tuple2.class)), propClustering);
    }

    public RDD<Triple> getHyp() {
        return this.nTriplesRDD.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$getHyp$1(this, triple));
        });
    }

    public RDD<Triple> getObjectList() {
        return this.nTriplesRDD.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$getObjectList$1(triple));
        });
    }

    public RDD<Triple> triplesWithNumericLit(RDD<Triple> rdd) {
        return rdd.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$triplesWithNumericLit$1(triple));
        });
    }

    public RDD<Tuple2<String, String>> propwithsubject(RDD<Triple> rdd) {
        return rdd.map(triple -> {
            return new Tuple2(Utils$.MODULE$.getLocalName(triple.getSubject()), Utils$.MODULE$.getLocalName(triple.getPredicate()));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public RDD<Tuple2<String, HashSet<String>>> rdfType(RDD<Triple> rdd) {
        RDD map = this.nTriplesRDD.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$rdfType$1(triple));
        }).filter(triple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$rdfType$2(this, triple2));
        }).map(triple3 -> {
            return new Tuple2(Utils$.MODULE$.getLocalName(triple3.getSubject()), Utils$.MODULE$.getLocalName(triple3.getObject()));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        RDD aggregateByKey = RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), Ordering$String$.MODULE$).aggregateByKey(HashSet$.MODULE$.empty(), (hashSet, str) -> {
            return hashSet.$plus$eq(str);
        }, (hashSet2, hashSet3) -> {
            return hashSet2.$plus$plus$eq(hashSet3);
        }, ClassTag$.MODULE$.apply(HashSet.class));
        RDD map2 = rdd.map(triple4 -> {
            return new Tuple2(Utils$.MODULE$.getLocalName(triple4.getSubject()), new StringBuilder(8).append(Utils$.MODULE$.getLocalName(triple4.getObject())).append("hypernym").toString());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        RDD union = RDD$.MODULE$.rddToPairRDDFunctions(map2, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), Ordering$String$.MODULE$).aggregateByKey(HashSet$.MODULE$.empty(), (hashSet4, str2) -> {
            return hashSet4.$plus$eq(str2);
        }, (hashSet5, hashSet6) -> {
            return hashSet5.$plus$plus$eq(hashSet6);
        }, ClassTag$.MODULE$.apply(HashSet.class)).union(aggregateByKey);
        return RDD$.MODULE$.rddToPairRDDFunctions(union, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(HashSet.class), Ordering$String$.MODULE$).aggregateByKey(HashSet$.MODULE$.empty(), (hashSet7, hashSet8) -> {
            return hashSet7.$plus$eq(hashSet8);
        }, (hashSet9, hashSet10) -> {
            return hashSet9.$plus$plus$eq(hashSet10);
        }, ClassTag$.MODULE$.apply(HashSet.class)).map(tuple2 -> {
            return new Tuple2(tuple2._1(), ((TraversableLike) tuple2._2()).flatMap(hashSet11 -> {
                return hashSet11;
            }, HashSet$.MODULE$.canBuildFrom()));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public RDD<Set<Tuple3<String, String, Object>>> jSimilarity(RDD<Triple> rdd, RDD<Tuple2<String, String>> rdd2, RDD<Tuple2<String, HashSet<String>>> rdd3, RDD<Tuple2<String, scala.collection.mutable.Set<Tuple3<String, String, Object>>>> rdd4) {
        this.nTriplesRDD.unpersist(this.nTriplesRDD.unpersist$default$1());
        RDD map = rdd3.map(tuple2 -> {
            return new Tuple2(tuple2._1(), ((SetLike) tuple2._2()).toSeq());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        RDD persist = RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Seq.class), Ordering$String$.MODULE$).partitionBy(new RangePartitioner(30, map, RangePartitioner$.MODULE$.$lessinit$greater$default$3(), RangePartitioner$.MODULE$.$lessinit$greater$default$4(), Ordering$String$.MODULE$, ClassTag$.MODULE$.apply(String.class))).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        final AnomalyWithHashingTF anomalyWithHashingTF = null;
        Dataset transform = new HashingTF().setInputCol("values").setOutputCol("features").setNumFeatures(1048576).transform(this.sparkSession.implicits().rddToDatasetHolder(persist, this.sparkSession.implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(AnomalyWithHashingTF.class.getClassLoader()), new TypeCreator(anomalyWithHashingTF) { // from class: net.sansa_stack.ml.spark.outliers.anomalydetection.AnomalyWithHashingTF$$typecreator5$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), new $colon.colon(universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), new $colon.colon(universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection").asModule().moduleClass()), mirror.staticClass("scala.collection.Seq"), new $colon.colon(universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), Nil$.MODULE$)), Nil$.MODULE$)));
            }
        }))).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"id", "values"})).dropDuplicates());
        RDD map2 = RDD$.MODULE$.rddToPairRDDFunctions(new MinHashLSH().setNumHashTables(3).setInputCol("features").setOutputCol("hashes").fit(transform).approxSimilarityJoin(transform, transform, 0.45d).filter(this.sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"datasetA.id"}))).$(Nil$.MODULE$).isNotNull()).filter(this.sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"datasetB.id"}))).$(Nil$.MODULE$).isNotNull()).filter(this.sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"datasetA.id"}))).$(Nil$.MODULE$).$eq$bang$eq(this.sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"datasetB.id"}))).$(Nil$.MODULE$))).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("datasetA.id").alias("id1"), functions$.MODULE$.col("datasetB.id").alias("id2")})).repartition(400).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK()).rdd().map(row -> {
            return new Tuple2(row.getString(0), row.getString(1));
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), Ordering$String$.MODULE$).aggregateByKey(Set$.MODULE$.empty(), (set, str) -> {
            return set.$plus$eq(str);
        }, (set2, set3) -> {
            return set2.$plus$plus$eq(set3);
        }, ClassTag$.MODULE$.apply(scala.collection.mutable.Set.class)).map(tuple22 -> {
            return new Tuple2(tuple22._1(), ((SetLike) tuple22._2()).$plus$eq(tuple22._1()).toSet());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        HashPartitioner hashPartitioner = new HashPartitioner(500);
        RDD persist2 = RDD$.MODULE$.rddToPairRDDFunctions(rdd4, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(scala.collection.mutable.Set.class), Ordering$String$.MODULE$).partitionBy(hashPartitioner).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD persist3 = RDD$.MODULE$.rddToPairRDDFunctions(map2, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Set.class), Ordering$String$.MODULE$).partitionBy(hashPartitioner).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD map3 = RDD$.MODULE$.rddToPairRDDFunctions(persist3, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Set.class), Ordering$String$.MODULE$).join(persist2).map(tuple23 -> {
            Tuple2 tuple23;
            if (tuple23 == null || (tuple23 = (Tuple2) tuple23._2()) == null) {
                throw new MatchError(tuple23);
            }
            return new Tuple2(((Set) tuple23._1()).toSet(), (scala.collection.mutable.Set) tuple23._2());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        HashSet empty = HashSet$.MODULE$.empty();
        Function2 function2 = (hashSet, set4) -> {
            return hashSet.$plus$eq(set4);
        };
        Function2 function22 = (hashSet2, hashSet3) -> {
            return hashSet2.$plus$plus$eq(hashSet3);
        };
        ClassTag apply = ClassTag$.MODULE$.apply(Set.class);
        ClassTag apply2 = ClassTag$.MODULE$.apply(scala.collection.mutable.Set.class);
        RDD$.MODULE$.rddToPairRDDFunctions$default$4(map3);
        RDD<Set<Tuple3<String, String, Object>>> map4 = RDD$.MODULE$.rddToPairRDDFunctions(map3, apply, apply2, (Ordering) null).aggregateByKey(empty, function2, function22, ClassTag$.MODULE$.apply(HashSet.class)).map(tuple24 -> {
            if (tuple24 == null) {
                throw new MatchError(tuple24);
            }
            HashSet hashSet4 = (HashSet) tuple24._2();
            return new Tuple2(((TraversableOnce) hashSet4.flatMap(set5 -> {
                return (scala.collection.mutable.Set) set5.map(tuple3 -> {
                    return (String) tuple3._2();
                }, Set$.MODULE$.canBuildFrom());
            }, HashSet$.MODULE$.canBuildFrom())).toSet(), hashSet4);
        }, ClassTag$.MODULE$.apply(Tuple2.class)).flatMap(tuple25 -> {
            if (tuple25 == null) {
                throw new MatchError(tuple25);
            }
            Set set5 = (Set) tuple25._1();
            HashSet hashSet4 = (HashSet) tuple25._2();
            return (Set) set5.map(str2 -> {
                return new Tuple2(str2, ((TraversableOnce) hashSet4.flatMap(set6 -> {
                    return set6;
                }, HashSet$.MODULE$.canBuildFrom())).toSet());
            }, scala.collection.immutable.Set$.MODULE$.canBuildFrom());
        }, ClassTag$.MODULE$.apply(Tuple2.class)).map(tuple26 -> {
            if (tuple26 == null) {
                throw new MatchError(tuple26);
            }
            String str2 = (String) tuple26._1();
            return (Set) ((Set) tuple26._2()).filter(tuple3 -> {
                return BoxesRunTime.boxToBoolean($anonfun$jSimilarity$16(str2, tuple3));
            });
        }, ClassTag$.MODULE$.apply(Set.class));
        persist2.unpersist(persist2.unpersist$default$1());
        persist3.unpersist(persist3.unpersist$default$1());
        return map4;
    }

    public RDD<Tuple2<String, scala.collection.mutable.Set<Tuple3<String, String, Object>>>> propClustering(RDD<Triple> rdd) {
        RDD map = rdd.map(triple -> {
            return new Tuple2(Utils$.MODULE$.getLocalName(triple.getSubject()), new Tuple3(Utils$.MODULE$.getLocalName(triple.getSubject()), Utils$.MODULE$.getLocalName(triple.getPredicate()), Utils$.MODULE$.getNumber(triple.getObject().toString())));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        return RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple3.class), Ordering$String$.MODULE$).aggregateByKey(Set$.MODULE$.empty(), (set, tuple3) -> {
            return set.$plus$eq(tuple3);
        }, (set2, set3) -> {
            return set2.$plus$plus$eq(set3);
        }, ClassTag$.MODULE$.apply(scala.collection.mutable.Set.class));
    }

    public Seq<Tuple3<String, String, Object>> iqr2(Seq<Tuple3<String, String, Object>> seq, int i) {
        double[] dArr = (double[]) new ArrayOps.ofDouble(Predef$.MODULE$.doubleArrayOps((double[]) ((TraversableOnce) seq.map(tuple3 -> {
            return BoxesRunTime.boxToDouble($anonfun$iqr2$1(tuple3));
        }, Seq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.Double()))).sorted(Ordering$Double$.MODULE$);
        DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
        Predef$.MODULE$.genericArrayOps(dArr).foreach(d -> {
            descriptiveStatistics.addValue(d);
        });
        double percentile = descriptiveStatistics.getPercentile(25.0d);
        double percentile2 = descriptiveStatistics.getPercentile(75.0d);
        double d2 = percentile2 - percentile;
        double d3 = percentile - (1.5d * d2);
        double d4 = percentile2 + (1.5d * d2);
        double[] dArr2 = (double[]) new ArrayOps.ofDouble(Predef$.MODULE$.doubleArrayOps(dArr)).filter(d5 -> {
            return d5 < d3 || d5 > d4;
        });
        return (Seq) seq.filter(tuple32 -> {
            return BoxesRunTime.boxToBoolean($anonfun$iqr2$4(dArr2, tuple32));
        });
    }

    public static final /* synthetic */ boolean $anonfun$run$1(AnomalyWithHashingTF anomalyWithHashingTF, Triple triple) {
        return Utils$.MODULE$.searchedge(triple.getObject().toString(), anomalyWithHashingTF.objList);
    }

    public static final /* synthetic */ boolean $anonfun$run$2(Triple triple) {
        return (triple.getPredicate().toString().contains("wikiPageID") || triple.getPredicate().toString().contains("wikiPageRevisionID")) ? false : true;
    }

    public static final /* synthetic */ boolean $anonfun$getHyp$1(AnomalyWithHashingTF anomalyWithHashingTF, Triple triple) {
        return triple.getPredicate().toString().equals(anomalyWithHashingTF.hypernym);
    }

    public static final /* synthetic */ boolean $anonfun$getObjectList$1(Triple triple) {
        return triple.getObject().isLiteral();
    }

    public static final /* synthetic */ boolean $anonfun$triplesWithNumericLit$1(Triple triple) {
        return Utils$.MODULE$.isNumeric(triple.getObject().toString());
    }

    public static final /* synthetic */ boolean $anonfun$rdfType$1(Triple triple) {
        String node = triple.getPredicate().toString();
        return node != null ? node.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") : "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" == 0;
    }

    public static final /* synthetic */ boolean $anonfun$rdfType$2(AnomalyWithHashingTF anomalyWithHashingTF, Triple triple) {
        return Utils$.MODULE$.searchType(triple.getObject().toString(), anomalyWithHashingTF.triplesType);
    }

    public static final /* synthetic */ boolean $anonfun$jSimilarity$16(String str, Tuple3 tuple3) {
        return ((String) tuple3._2()).equals(str);
    }

    public static final /* synthetic */ double $anonfun$iqr2$1(Tuple3 tuple3) {
        return new StringOps(Predef$.MODULE$.augmentString(tuple3._3().toString())).toDouble();
    }

    public static final /* synthetic */ boolean $anonfun$iqr2$4(double[] dArr, Tuple3 tuple3) {
        return Utils$.MODULE$.search(new StringOps(Predef$.MODULE$.augmentString(tuple3._3().toString())).toDouble(), dArr);
    }

    public AnomalyWithHashingTF(RDD<Triple> rdd, List<String> list, List<String> list2, double d, List<String> list3, SparkSession sparkSession, String str, int i) {
        this.nTriplesRDD = rdd;
        this.objList = list;
        this.triplesType = list2;
        this.sparkSession = sparkSession;
        this.hypernym = str;
    }
}
