package net.sansa_stack.ml.spark.clustering.utils;

import com.typesafe.config.Config;
import java.io.File;
import java.io.FilenameFilter;
import net.sansa_stack.ml.spark.clustering.datatypes.Categories;
import net.sansa_stack.ml.spark.clustering.datatypes.CoordinatePOI;
import net.sansa_stack.ml.spark.clustering.datatypes.POI;
import net.sansa_stack.rdf.spark.io.NTripleReader$;
import org.apache.jena.graph.Triple;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.SparkSession;
import scala.Option$;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Set$;
import scala.math.Ordering$Long$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;
import scala.runtime.RichInt$;
import scala.util.matching.Regex;

/* compiled from: DataProcessing.scala */
@ScalaSignature(bytes = "\u0006\u0001\t\u001da\u0001\u0002\u0014(\u0001QB\u0001\u0002\f\u0001\u0003\u0006\u0004%\tA\u0010\u0005\t\u0015\u0002\u0011\t\u0011)A\u0005\u007f!A1\n\u0001BC\u0002\u0013\u0005A\n\u0003\u0005X\u0001\t\u0005\t\u0015!\u0003N\u0011!A\u0006A!A!\u0002\u0013I\u0006\"B4\u0001\t\u0003A\u0007b\u00028\u0001\u0001\u0004%\ta\u001c\u0005\b{\u0002\u0001\r\u0011\"\u0001\u007f\u0011\u001d\tI\u0001\u0001Q!\nAD\u0011\"a\u0003\u0001\u0001\u0004%\t!!\u0004\t\u0013\u0005M\u0001\u00011A\u0005\u0002\u0005U\u0001\u0002CA\r\u0001\u0001\u0006K!a\u0004\t\u0013\u0005m\u0001\u00011A\u0005\u0002\u0005u\u0001\"CA\u001d\u0001\u0001\u0007I\u0011AA\u001e\u0011!\ty\u0004\u0001Q!\n\u0005}\u0001\"CA!\u0001\u0001\u0007I\u0011AA\"\u0011%\ty\u0005\u0001a\u0001\n\u0003\t\t\u0006\u0003\u0005\u0002V\u0001\u0001\u000b\u0015BA#\u0011%\t9\u0006\u0001a\u0001\n\u0003\t\u0019\u0005C\u0005\u0002Z\u0001\u0001\r\u0011\"\u0001\u0002\\!A\u0011q\f\u0001!B\u0013\t)\u0005C\u0005\u0002b\u0001\u0011\r\u0011\"\u0001\u0002d!A\u0011\u0011\u000f\u0001!\u0002\u0013\t)\u0007C\u0005\u0002t\u0001\u0001\r\u0011\"\u0001\u0002v!I\u0011q\u0010\u0001A\u0002\u0013\u0005\u0011\u0011\u0011\u0005\t\u0003\u000b\u0003\u0001\u0015)\u0003\u0002x!9\u0011q\u0011\u0001\u0005\u0002\u0005%\u0005bBAK\u0001\u0011\u0005\u0011q\u0013\u0005\u0007\u0003W\u0003A\u0011A8\t\u000f\u0005-\u0006\u0001\"\u0001\u0002.\"9\u0011q\u0017\u0001\u0005\u0002\u00055\u0001bBA]\u0001\u0011\u0005\u00111\u0018\u0005\b\u0003\u0007\u0004A\u0011AA\"\u0011\u001d\t)\r\u0001C\u0001\u0003\u000fDq!!4\u0001\t\u0003\ty\rC\u0004\u0002V\u0002!\t!a6\t\u000f\u0005=\b\u0001\"\u0001\u0002r\nqA)\u0019;b!J|7-Z:tS:<'B\u0001\u0015*\u0003\u0015)H/\u001b7t\u0015\tQ3&\u0001\u0006dYV\u001cH/\u001a:j]\u001eT!\u0001L\u0017\u0002\u000bM\u0004\u0018M]6\u000b\u00059z\u0013AA7m\u0015\t\u0001\u0014'A\u0006tC:\u001c\u0018mX:uC\u000e\\'\"\u0001\u001a\u0002\u00079,Go\u0001\u0001\u0014\u0007\u0001)4\b\u0005\u00027s5\tqGC\u00019\u0003\u0015\u00198-\u00197b\u0013\tQtG\u0001\u0004B]f\u0014VM\u001a\t\u0003mqJ!!P\u001c\u0003\u0019M+'/[1mSj\f'\r\\3\u0016\u0003}\u0002\"\u0001\u0011%\u000e\u0003\u0005S!AQ\"\u0002\u0007M\fHN\u0003\u0002-\t*\u0011QIR\u0001\u0007CB\f7\r[3\u000b\u0003\u001d\u000b1a\u001c:h\u0013\tI\u0015I\u0001\u0007Ta\u0006\u00148nU3tg&|g.\u0001\u0004ta\u0006\u00148\u000eI\u0001\u0005G>tg-F\u0001N!\tqU+D\u0001P\u0015\t\u0001\u0016+\u0001\u0004d_:4\u0017n\u001a\u0006\u0003%N\u000b\u0001\u0002^=qKN\fg-\u001a\u0006\u0002)\u0006\u00191m\\7\n\u0005Y{%AB\"p]\u001aLw-A\u0003d_:4\u0007%A\u0004eCR\f'\u000b\u0012#\u0011\u0007ikv,D\u0001\\\u0015\ta6)A\u0002sI\u0012L!AX.\u0003\u0007I#E\t\u0005\u0002aK6\t\u0011M\u0003\u0002cG\u0006)qM]1qQ*\u0011A\rR\u0001\u0005U\u0016t\u0017-\u0003\u0002gC\n1AK]5qY\u0016\fa\u0001P5oSRtD\u0003B5lY6\u0004\"A\u001b\u0001\u000e\u0003\u001dBQ\u0001\f\u0004A\u0002}BQa\u0013\u0004A\u00025CQ\u0001\u0017\u0004A\u0002e\u000ba\u0002]8j\u0007>|'\u000fZ5oCR,7/F\u0001q!\rQV,\u001d\t\u0005mI$x/\u0003\u0002to\t1A+\u001e9mKJ\u0002\"AN;\n\u0005Y<$\u0001\u0002'p]\u001e\u0004\"\u0001_>\u000e\u0003eT!A_\u0015\u0002\u0013\u0011\fG/\u0019;za\u0016\u001c\u0018B\u0001?z\u00055\u0019un\u001c:eS:\fG/\u001a)P\u0013\u0006\u0011\u0002o\\5D_>\u0014H-\u001b8bi\u0016\u001cx\fJ3r)\ry\u0018Q\u0001\t\u0004m\u0005\u0005\u0011bAA\u0002o\t!QK\\5u\u0011!\t9\u0001CA\u0001\u0002\u0004\u0001\u0018a\u0001=%c\u0005y\u0001o\\5D_>\u0014H-\u001b8bi\u0016\u001c\b%A\tq_&4E.\u0019;DCR,wm\u001c:z\u0013\u0012,\"!a\u0004\u0011\tik\u0016\u0011\u0003\t\u0005mI$H/A\u000bq_&4E.\u0019;DCR,wm\u001c:z\u0013\u0012|F%Z9\u0015\u0007}\f9\u0002C\u0005\u0002\b-\t\t\u00111\u0001\u0002\u0010\u0005\u0011\u0002o\\5GY\u0006$8)\u0019;fO>\u0014\u00180\u00133!\u00035\u0001x.[\"bi\u0016<wN]=JIV\u0011\u0011q\u0004\t\u00055v\u000b\t\u0003E\u00037eR\f\u0019\u0003E\u0003\u0002&\u0005MBO\u0004\u0003\u0002(\u0005=\u0002cAA\u0015o5\u0011\u00111\u0006\u0006\u0004\u0003[\u0019\u0014A\u0002\u001fs_>$h(C\u0002\u00022]\na\u0001\u0015:fI\u00164\u0017\u0002BA\u001b\u0003o\u00111aU3u\u0015\r\t\tdN\u0001\u0012a>L7)\u0019;fO>\u0014\u00180\u00133`I\u0015\fHcA@\u0002>!I\u0011q\u0001\b\u0002\u0002\u0003\u0007\u0011qD\u0001\u000fa>L7)\u0019;fO>\u0014\u00180\u00133!\u0003M\u0001x.[\"bi\u0016<wN]=WC2,XmU3u+\t\t)\u0005\u0005\u0003[;\u0006\u001d\u0003#\u0002\u001csi\u0006%\u0003c\u0001=\u0002L%\u0019\u0011QJ=\u0003\u0015\r\u000bG/Z4pe&,7/A\fq_&\u001c\u0015\r^3h_JLh+\u00197vKN+Go\u0018\u0013fcR\u0019q0a\u0015\t\u0013\u0005\u001d\u0011#!AA\u0002\u0005\u0015\u0013\u0001\u00069pS\u000e\u000bG/Z4pef4\u0016\r\\;f'\u0016$\b%A\u0007q_&\u001c\u0015\r^3h_JLWm]\u0001\u0012a>L7)\u0019;fO>\u0014\u0018.Z:`I\u0015\fHcA@\u0002^!I\u0011q\u0001\u000b\u0002\u0002\u0003\u0007\u0011QI\u0001\u000fa>L7)\u0019;fO>\u0014\u0018.Z:!\u0003E\u0001x.[-fYB\u001c\u0015\r^3h_JLWm]\u000b\u0003\u0003K\u0002BAW/\u0002hA)aG\u001d;\u0002jA1aG]A%\u0003W\u00022ANA7\u0013\r\tyg\u000e\u0002\u0007\t>,(\r\\3\u0002%A|\u0017.W3ma\u000e\u000bG/Z4pe&,7\u000fI\u0001\u0005a>L7/\u0006\u0002\u0002xA!!,XA=!\rA\u00181P\u0005\u0004\u0003{J(a\u0001)P\u0013\u0006A\u0001o\\5t?\u0012*\u0017\u000fF\u0002��\u0003\u0007C\u0011\"a\u0002\u001a\u0003\u0003\u0005\r!a\u001e\u0002\u000bA|\u0017n\u001d\u0011\u0002\u00171|\u0017\r\u001a(Ue&\u0004H.\u001a\u000b\u00043\u0006-\u0005bBAG7\u0001\u0007\u0011qR\u0001\u000fiJL\u0007\u000f\\3GS2,\u0007+\u0019;i!\u0011\t)#!%\n\t\u0005M\u0015q\u0007\u0002\u0007'R\u0014\u0018N\\4\u0002#\u0019LG\u000e^3s\u0007>|'\u000fZ5oCR,7\u000fF\u0006q\u00033\u000bY*a(\u0002$\u0006\u001d\u0006\"\u00028\u001d\u0001\u0004\u0001\bbBAO9\u0001\u0007\u00111N\u0001\u0007Y>|V.\u001b8\t\u000f\u0005\u0005F\u00041\u0001\u0002l\u00051An\\0nCbDq!!*\u001d\u0001\u0004\tY'\u0001\u0004mC~k\u0017N\u001c\u0005\b\u0003Sc\u0002\u0019AA6\u0003\u0019a\u0017mX7bq\u0006\tr-\u001a;Q\u001f&\u001bun\u001c:eS:\fG/Z:\u0015\u0013A\fy+!-\u00024\u0006U\u0006bBAO=\u0001\u0007\u00111\u000e\u0005\b\u0003Cs\u0002\u0019AA6\u0011\u001d\t)K\ba\u0001\u0003WBq!!+\u001f\u0001\u0004\tY'\u0001\u000bhKR\u0004v*\u0013$mCR\u001c\u0015\r^3h_JL\u0018\nZ\u0001\u0011O\u0016$\bkT%DCR,wm\u001c:jKN$\u0002\"!\u0012\u0002>\u0006}\u0016\u0011\u0019\u0005\u0006]\u0002\u0002\r\u0001\u001d\u0005\b\u0003\u0017\u0001\u0003\u0019AA\b\u0011\u001d\t\t\u0005\ta\u0001\u0003\u000b\n\u0011cZ3u\u0007\u0006$XmZ8ssZ\u000bG.^3t\u000359W\r^\"bi\u0016<wN]=JIR1\u0011qDAe\u0003\u0017DQA\u001c\u0012A\u0002ADq!a\u0003#\u0001\u0004\ty!A\thKRLV\r\u001c9DCR,wm\u001c:jKN$B!!\u001a\u0002R\"1\u00111[\u0012A\u0002e\u000b\u0011\"\\3sO\u0016$'\u000b\u0012#\u0002\u0017\u001d,Go\u0018;sSBdWm\u001d\u000b\u000b\u00033\fi.!9\u0002l\u00065\b\u0003\u0002.^\u00037\u0004RA\u000e:\u0002\u0010~Cq!a8%\u0001\u0004\ty)A\u0001b\u0011\u001d\t\u0019\u000f\na\u0001\u0003K\f\u0001\u0002]8j\u0003J\u0014\u0018-\u001f\t\u0005m\u0005\u001dH/C\u0002\u0002j^\u0012Q!\u0011:sCfDQ\u0001\u0017\u0013A\u0002eCQ\u0001\f\u0013A\u0002}\nab\u0019:fCR,7+\u001e2kK\u000e$8\u000f\u0006\u0003\u0002t\n\r\u0001CBA{\u0003\u007f\fy)\u0004\u0002\u0002x*!\u0011\u0011`A~\u0003\u001diW\u000f^1cY\u0016T1!!@8\u0003)\u0019w\u000e\u001c7fGRLwN\\\u0005\u0005\u0005\u0003\t9PA\u0006BeJ\f\u0017PQ;gM\u0016\u0014\bB\u0002B\u0003K\u0001\u0007A/A\u0003q_&LE\t")
/* loaded from: input_file:net/sansa_stack/ml/spark/clustering/utils/DataProcessing.class */
public class DataProcessing implements Serializable {
    private final SparkSession spark;
    private final Config conf;
    private final RDD<Triple> dataRDD;
    private RDD<Tuple2<Object, CoordinatePOI>> poiCoordinates = getPOICoordinates();
    private RDD<Tuple2<Object, Object>> poiFlatCategoryId = getPOIFlatCategoryId();
    private RDD<Tuple2<Object, Set<Object>>> poiCategoryId = getCategoryId(poiCoordinates(), poiFlatCategoryId()).persist();
    private RDD<Tuple2<Object, Categories>> poiCategoryValueSet = getCategoryValues();
    private RDD<Tuple2<Object, Categories>> poiCategories = getPOICategories(poiCoordinates(), poiFlatCategoryId(), poiCategoryValueSet());
    private final RDD<Tuple2<Object, Tuple2<Categories, Object>>> poiYelpCategories;
    private RDD<POI> pois;

    public SparkSession spark() {
        return this.spark;
    }

    public Config conf() {
        return this.conf;
    }

    public RDD<Tuple2<Object, CoordinatePOI>> poiCoordinates() {
        return this.poiCoordinates;
    }

    public void poiCoordinates_$eq(RDD<Tuple2<Object, CoordinatePOI>> rdd) {
        this.poiCoordinates = rdd;
    }

    public RDD<Tuple2<Object, Object>> poiFlatCategoryId() {
        return this.poiFlatCategoryId;
    }

    public void poiFlatCategoryId_$eq(RDD<Tuple2<Object, Object>> rdd) {
        this.poiFlatCategoryId = rdd;
    }

    public RDD<Tuple2<Object, Set<Object>>> poiCategoryId() {
        return this.poiCategoryId;
    }

    public void poiCategoryId_$eq(RDD<Tuple2<Object, Set<Object>>> rdd) {
        this.poiCategoryId = rdd;
    }

    public RDD<Tuple2<Object, Categories>> poiCategoryValueSet() {
        return this.poiCategoryValueSet;
    }

    public void poiCategoryValueSet_$eq(RDD<Tuple2<Object, Categories>> rdd) {
        this.poiCategoryValueSet = rdd;
    }

    public RDD<Tuple2<Object, Categories>> poiCategories() {
        return this.poiCategories;
    }

    public void poiCategories_$eq(RDD<Tuple2<Object, Categories>> rdd) {
        this.poiCategories = rdd;
    }

    public RDD<Tuple2<Object, Tuple2<Categories, Object>>> poiYelpCategories() {
        return this.poiYelpCategories;
    }

    public RDD<POI> pois() {
        return this.pois;
    }

    public void pois_$eq(RDD<POI> rdd) {
        this.pois = rdd;
    }

    public RDD<Triple> loadNTriple(String str) {
        File file = new File(str);
        if (!file.isDirectory()) {
            return NTripleReader$.MODULE$.load(spark(), file.getAbsolutePath(), NTripleReader$.MODULE$.load$default$3(), NTripleReader$.MODULE$.load$default$4(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6());
        }
        final DataProcessing dataProcessing = null;
        File[] listFiles = file.listFiles(new FilenameFilter(dataProcessing) { // from class: net.sansa_stack.ml.spark.clustering.utils.DataProcessing$$anon$1
            @Override // java.io.FilenameFilter
            public boolean accept(File file2, String str2) {
                return (str2.toString().contains("SUCCESS") || str2.toLowerCase().endsWith(".crc")) ? false : true;
            }
        });
        IntRef create = IntRef.create(0);
        ObjectRef create2 = ObjectRef.create(NTripleReader$.MODULE$.load(spark(), listFiles[0].getAbsolutePath(), NTripleReader$.MODULE$.load$default$3(), NTripleReader$.MODULE$.load$default$4(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6()));
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(listFiles)).foreach(file2 -> {
            $anonfun$loadNTriple$1(this, create, create2, file2);
            return BoxedUnit.UNIT;
        });
        return (RDD) create2.elem;
    }

    public RDD<Tuple2<Object, CoordinatePOI>> filterCoordinates(RDD<Tuple2<Object, CoordinatePOI>> rdd, double d, double d2, double d3, double d4) {
        return rdd.filter(tuple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$filterCoordinates$1(d, d2, d3, d4, tuple2));
        });
    }

    public RDD<Tuple2<Object, CoordinatePOI>> getPOICoordinates() {
        Regex r = new StringOps(Predef$.MODULE$.augmentString("POINT(.+ .+)")).r();
        return RDD$.MODULE$.rddToPairRDDFunctions(this.dataRDD.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$getPOICoordinates$1(this, triple));
        }).map(triple2 -> {
            return new Tuple2(BoxesRunTime.boxToLong(new StringOps(Predef$.MODULE$.augmentString(triple2.getSubject().toString().replace(this.conf().getString("sansa.data.poiPrefix"), "").replace("/geometry", ""))).toLong()), ((String) Option$.MODULE$.option2Iterable(r.findFirstIn(triple2.getObject().toString())).head()).replace("POINT", "").replace("^^http://www.opengis.net/ont/geosparql#wktLiteral", "").replaceAll("^\"|\"$", ""));
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(String.class), Ordering$Long$.MODULE$).mapValues(str -> {
            String[] split = str.replace("(", "").replace(")", "").split(" ");
            return new CoordinatePOI(new StringOps(Predef$.MODULE$.augmentString(split[0])).toDouble(), new StringOps(Predef$.MODULE$.augmentString(split[1])).toDouble());
        });
    }

    public RDD<Tuple2<Object, CoordinatePOI>> getPOICoordinates(double d, double d2, double d3, double d4) {
        return filterCoordinates(getPOICoordinates(), d, d2, d3, d4);
    }

    public RDD<Tuple2<Object, Object>> getPOIFlatCategoryId() {
        return this.dataRDD.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$getPOIFlatCategoryId$1(this, triple));
        }).map(triple2 -> {
            return new Tuple2.mcJJ.sp(new StringOps(Predef$.MODULE$.augmentString(triple2.getSubject().toString().replace(this.conf().getString("sansa.data.poiPrefix"), ""))).toLong(), new StringOps(Predef$.MODULE$.augmentString(triple2.getObject().toString().replace(this.conf().getString("sansa.data.termPrefix"), ""))).toLong());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public RDD<Tuple2<Object, Categories>> getPOICategories(RDD<Tuple2<Object, CoordinatePOI>> rdd, RDD<Tuple2<Object, Object>> rdd2, RDD<Tuple2<Object, Categories>> rdd3) {
        return RDD$.MODULE$.rddToPairRDDFunctions(rdd, ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(CoordinatePOI.class), Ordering$Long$.MODULE$).join(RDD$.MODULE$.rddToPairRDDFunctions(RDD$.MODULE$.rddToPairRDDFunctions(rdd2.map(tuple2 -> {
            return new Tuple2.mcJJ.sp(tuple2._2$mcJ$sp(), tuple2._1$mcJ$sp());
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.Long(), Ordering$Long$.MODULE$).join(rdd3).map(tuple22 -> {
            return new Tuple2(BoxesRunTime.boxToLong(((Tuple2) tuple22._2())._1$mcJ$sp()), ((Tuple2) tuple22._2())._2());
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(Categories.class), Ordering$Long$.MODULE$).groupByKey()).map(tuple23 -> {
            return new Tuple2(BoxesRunTime.boxToLong(tuple23._1$mcJ$sp()), new Categories(Set$.MODULE$.apply(((TraversableOnce) ((TraversableLike) ((Tuple2) tuple23._2())._2()).flatMap(categories -> {
                return categories.categories();
            }, Iterable$.MODULE$.canBuildFrom())).toList())));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public RDD<Tuple2<Object, Categories>> getCategoryValues() {
        return RDD$.MODULE$.rddToPairRDDFunctions(this.dataRDD.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$getCategoryValues$1(this, triple));
        }).map(triple2 -> {
            return new Tuple2(BoxesRunTime.boxToLong(new StringOps(Predef$.MODULE$.augmentString(triple2.getSubject().toString().replace(this.conf().getString("sansa.data.termPrefix"), ""))).toLong()), triple2.getObject().toString().replaceAll("\"", ""));
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(String.class), Ordering$Long$.MODULE$).groupByKey().map(tuple2 -> {
            return new Tuple2(BoxesRunTime.boxToLong(tuple2._1$mcJ$sp()), new Categories(Set$.MODULE$.apply(((TraversableOnce) tuple2._2()).toList())));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public RDD<Tuple2<Object, Set<Object>>> getCategoryId(RDD<Tuple2<Object, CoordinatePOI>> rdd, RDD<Tuple2<Object, Object>> rdd2) {
        return RDD$.MODULE$.rddToPairRDDFunctions(rdd, ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(CoordinatePOI.class), Ordering$Long$.MODULE$).join(RDD$.MODULE$.rddToPairRDDFunctions(rdd2, ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.Long(), Ordering$Long$.MODULE$).groupByKey()).map(tuple2 -> {
            return new Tuple2(BoxesRunTime.boxToLong(tuple2._1$mcJ$sp()), ((TraversableOnce) ((Tuple2) tuple2._2())._2()).toSet());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public RDD<Tuple2<Object, Tuple2<Categories, Object>>> getYelpCategories(RDD<Triple> rdd) {
        RDD filter = rdd.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$getYelpCategories$1(this, triple));
        });
        Predef$.MODULE$.println(conf().getString("yelp.data.rating"));
        RDD filter2 = rdd.filter(triple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$getYelpCategories$2(this, triple2));
        });
        Predef$.MODULE$.println("category");
        Predef$.MODULE$.println(BoxesRunTime.boxToLong(filter.count()));
        Predef$.MODULE$.println("rating");
        Predef$.MODULE$.println(BoxesRunTime.boxToLong(filter2.count()));
        RDD map = filter.map(triple3 -> {
            return new Tuple2(BoxesRunTime.boxToLong(new StringOps(Predef$.MODULE$.augmentString(triple3.getSubject().toString().replace(this.conf().getString("sansa.data.poiPrefix"), ""))).toLong()), triple3.getObject().toString());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        return RDD$.MODULE$.rddToPairRDDFunctions(RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(String.class), Ordering$Long$.MODULE$).groupByKey(), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(Iterable.class), Ordering$Long$.MODULE$).join(filter2.map(triple4 -> {
            return new Tuple2.mcJD.sp(new StringOps(Predef$.MODULE$.augmentString(triple4.getSubject().toString().replace(this.conf().getString("sansa.data.poiPrefix"), ""))).toLong(), new StringOps(Predef$.MODULE$.augmentString(triple4.getObject().getLiteralValue().toString())).toDouble());
        }, ClassTag$.MODULE$.apply(Tuple2.class))).map(tuple2 -> {
            return new Tuple2(BoxesRunTime.boxToLong(tuple2._1$mcJ$sp()), new Tuple2(new Categories(Set$.MODULE$.apply(((TraversableOnce) ((Tuple2) tuple2._2())._1()).toList())), BoxesRunTime.boxToDouble(((Tuple2) tuple2._2())._2$mcD$sp())));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public RDD<Tuple2<String, Triple>> get_triples(String str, long[] jArr, RDD<Triple> rdd, SparkSession sparkSession) {
        ArrayBuffer apply = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), jArr.length - 1).foreach(obj -> {
            return $anonfun$get_triples$1(this, apply, jArr, BoxesRunTime.unboxToInt(obj));
        });
        RDD persist = rdd.map(triple -> {
            return new Tuple2(triple.getSubject().getURI(), triple);
        }, ClassTag$.MODULE$.apply(Tuple2.class)).persist();
        RDD persist2 = RDD$.MODULE$.rddToPairRDDFunctions(sparkSession.sparkContext().parallelize(apply.toSet().toList(), sparkSession.sparkContext().parallelize$default$2(), ClassTag$.MODULE$.apply(String.class)).map(str2 -> {
            return new Tuple2(str2, str2);
        }, ClassTag$.MODULE$.apply(Tuple2.class)).persist(), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), Ordering$String$.MODULE$).join(persist).map(tuple2 -> {
            return new Tuple2(str, ((Tuple2) tuple2._2())._2());
        }, ClassTag$.MODULE$.apply(Tuple2.class)).persist();
        return persist2.union(RDD$.MODULE$.rddToPairRDDFunctions(RDD$.MODULE$.rddToPairRDDFunctions(persist2.filter(tuple22 -> {
            return BoxesRunTime.boxToBoolean($anonfun$get_triples$5(tuple22));
        }).map(tuple23 -> {
            return ((Triple) tuple23._2()).getObject().getURI();
        }, ClassTag$.MODULE$.apply(String.class)).distinct().persist().map(str3 -> {
            return new Tuple2(str3, str3);
        }, ClassTag$.MODULE$.apply(Tuple2.class)).persist(), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), Ordering$String$.MODULE$).join(persist).map(tuple24 -> {
            return (Triple) ((Tuple2) tuple24._2())._2();
        }, ClassTag$.MODULE$.apply(Triple.class)).map(triple2 -> {
            return new Tuple2(new StringBuilder(0).append(triple2.getSubject().getURI()).append(triple2.getPredicate().getURI()).append(triple2.getObject().toString()).toString(), triple2);
        }, ClassTag$.MODULE$.apply(Tuple2.class)).persist(), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Triple.class), Ordering$String$.MODULE$).reduceByKey((triple3, triple4) -> {
            return triple3;
        }).map(tuple25 -> {
            return new Tuple2(str, tuple25._2());
        }, ClassTag$.MODULE$.apply(Tuple2.class)).persist());
    }

    public ArrayBuffer<String> createSubjects(long j) {
        ArrayBuffer<String> apply = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        String concat = "http://example.org/id/poi/".concat(BoxesRunTime.boxToLong(j).toString());
        apply.$plus$eq(concat);
        apply.$plus$eq(concat.concat("/address"));
        apply.$plus$eq(concat.concat("/phone"));
        apply.$plus$eq(concat.concat("/geometry"));
        apply.$plus$eq(concat.concat("/name"));
        apply.$plus$eq(concat.concat("/accuracy_info"));
        apply.$plus$eq(concat.concat("/brandname"));
        return apply;
    }

    public static final /* synthetic */ void $anonfun$loadNTriple$1(DataProcessing dataProcessing, IntRef intRef, ObjectRef objectRef, File file) {
        if (intRef.elem != 0) {
            objectRef.elem = ((RDD) objectRef.elem).union(NTripleReader$.MODULE$.load(dataProcessing.spark(), file.getAbsolutePath(), NTripleReader$.MODULE$.load$default$3(), NTripleReader$.MODULE$.load$default$4(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6()));
        }
        intRef.elem++;
    }

    public static final /* synthetic */ boolean $anonfun$filterCoordinates$1(double d, double d2, double d3, double d4, Tuple2 tuple2) {
        return ((CoordinatePOI) tuple2._2()).longitude() >= d && ((CoordinatePOI) tuple2._2()).longitude() <= d2 && ((CoordinatePOI) tuple2._2()).latitude() >= d3 && ((CoordinatePOI) tuple2._2()).latitude() <= d4;
    }

    public static final /* synthetic */ boolean $anonfun$getPOICoordinates$1(DataProcessing dataProcessing, Triple triple) {
        return triple.getPredicate().toString().equalsIgnoreCase(dataProcessing.conf().getString("sansa.data.coordinatesPredicate"));
    }

    public static final /* synthetic */ boolean $anonfun$getPOIFlatCategoryId$1(DataProcessing dataProcessing, Triple triple) {
        return triple.getPredicate().toString().equalsIgnoreCase(dataProcessing.conf().getString("sansa.data.categoryPOI"));
    }

    public static final /* synthetic */ boolean $anonfun$getCategoryValues$1(DataProcessing dataProcessing, Triple triple) {
        return triple.getPredicate().toString().equalsIgnoreCase(dataProcessing.conf().getString("sansa.data.termValueUri"));
    }

    public static final /* synthetic */ boolean $anonfun$getYelpCategories$1(DataProcessing dataProcessing, Triple triple) {
        return triple.getPredicate().toString().equalsIgnoreCase(dataProcessing.conf().getString("yelp.data.categoryPOI"));
    }

    public static final /* synthetic */ boolean $anonfun$getYelpCategories$2(DataProcessing dataProcessing, Triple triple) {
        return triple.getPredicate().toString().contains(dataProcessing.conf().getString("yelp.data.rating"));
    }

    public static final /* synthetic */ ArrayBuffer $anonfun$get_triples$1(DataProcessing dataProcessing, ArrayBuffer arrayBuffer, long[] jArr, int i) {
        return arrayBuffer.$plus$plus$eq(dataProcessing.createSubjects(jArr[i]));
    }

    public static final /* synthetic */ boolean $anonfun$get_triples$5(Tuple2 tuple2) {
        return ((Triple) tuple2._2()).getPredicate().getURI().equals("http://slipo.eu/def#category");
    }

    public DataProcessing(SparkSession sparkSession, Config config, RDD<Triple> rdd) {
        RDD<POI> persist;
        this.spark = sparkSession;
        this.conf = config;
        this.dataRDD = rdd;
        this.poiYelpCategories = getYelpCategories(rdd).sample(false, 0.1d, 0L);
        if (poiYelpCategories().isEmpty()) {
            persist = RDD$.MODULE$.rddToPairRDDFunctions(poiCoordinates(), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(CoordinatePOI.class), Ordering$Long$.MODULE$).join(poiCategories()).map(tuple2 -> {
                return new POI(tuple2._1$mcJ$sp(), (CoordinatePOI) ((Tuple2) tuple2._2())._1(), (Categories) ((Tuple2) tuple2._2())._2(), 0.0d);
            }, ClassTag$.MODULE$.apply(POI.class)).persist();
        } else {
            persist = RDD$.MODULE$.rddToPairRDDFunctions(poiCoordinates(), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(CoordinatePOI.class), Ordering$Long$.MODULE$).join(RDD$.MODULE$.rddToPairRDDFunctions(poiYelpCategories(), ClassTag$.MODULE$.Long(), ClassTag$.MODULE$.apply(Tuple2.class), Ordering$Long$.MODULE$).join(poiCategories()).map(tuple22 -> {
                return new Tuple2(BoxesRunTime.boxToLong(tuple22._1$mcJ$sp()), new Tuple2(new Categories(((Categories) ((Tuple2) ((Tuple2) tuple22._2())._1())._1()).categories().$plus$plus(((Categories) ((Tuple2) tuple22._2())._2()).categories())), BoxesRunTime.boxToDouble(((Tuple2) ((Tuple2) tuple22._2())._1())._2$mcD$sp())));
            }, ClassTag$.MODULE$.apply(Tuple2.class))).map(tuple23 -> {
                return new POI(tuple23._1$mcJ$sp(), (CoordinatePOI) ((Tuple2) tuple23._2())._1(), (Categories) ((Tuple2) ((Tuple2) tuple23._2())._2())._1(), ((Tuple2) ((Tuple2) tuple23._2())._2())._2$mcD$sp());
            }, ClassTag$.MODULE$.apply(POI.class)).persist();
        }
        this.pois = persist;
    }
}
