package net.sansa_stack.spark.cli.cmd.impl;

import java.io.OutputStream;
import java.net.URI;
import java.util.concurrent.TimeUnit;
import net.sansa_stack.rdf.spark.model.rdd.RddOfDatasetOps$;
import net.sansa_stack.spark.cli.cmd.CmdSansaTrigMerge;
import org.aksw.commons.io.util.StdIo;
import org.aksw.jena_sparql_api.rx.RDFLanguagesEx;
import org.aksw.jena_sparql_api.utils.io.StreamRDFDeferred;
import org.aksw.jena_sparql_api.utils.io.WriterStreamRDFBaseUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.jena.query.Dataset;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.RDFFormat;
import org.apache.jena.riot.system.StreamRDFOps;
import org.apache.jena.riot.system.StreamRDFWriter;
import org.apache.jena.riot.system.SyntaxLabels;
import org.apache.jena.riot.writer.WriterStreamRDFBase;
import org.apache.jena.shared.impl.PrefixMappingImpl;
import org.apache.jena.sparql.util.Context;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.IterableLike;
import scala.collection.Iterator;
import scala.collection.JavaConverters$;
import scala.collection.SetLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.Set$;
import scala.collection.mutable.Buffer$;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: CmdSansaTrigMergeImpl.scala */
/* loaded from: input_file:net/sansa_stack/spark/cli/cmd/impl/CmdSansaTrigMergeImpl$.class */
public final class CmdSansaTrigMergeImpl$ {
    public static CmdSansaTrigMergeImpl$ MODULE$;
    private final Logger logger;

    static {
        new CmdSansaTrigMergeImpl$();
    }

    private Logger logger() {
        return this.logger;
    }

    public Integer run(CmdSansaTrigMerge cmdSansaTrigMerge) {
        StopWatch createStarted = StopWatch.createStarted();
        PrefixMappingImpl prefixMappingImpl = new PrefixMappingImpl();
        ((IterableLike) JavaConverters$.MODULE$.asScalaBufferConverter(cmdSansaTrigMerge.outPrefixes).asScala()).foreach(str -> {
            MODULE$.logger().info(new StringBuilder(21).append("Adding prefixes from ").append(str).toString());
            return prefixMappingImpl.setNsPrefixes(RDFDataMgr.loadModel(str));
        });
        SparkSession orCreate = SparkSession$.MODULE$.builder().appName(new StringBuilder(15).append("Trig Merge ( ").append(cmdSansaTrigMerge.trigFiles).append(" )").toString()).config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryoserializer.buffer.max", "1000").config("spark.kryo.registrator", String.join(", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", "net.sansa_stack.query.spark.ontop.OntopKryoRegistrator")).config("spark.sql.crossJoin.enabled", true).getOrCreate();
        Configuration hadoopConfiguration = orCreate.sparkContext().hadoopConfiguration();
        List list = ((TraversableOnce) ((TraversableLike) ((TraversableLike) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(cmdSansaTrigMerge.trigFiles).asScala()).flatMap(str2 -> {
            Iterator apply = package$.MODULE$.Iterator().apply(Nil$.MODULE$);
            try {
                FileSystem fileSystem = FileSystem.get(new URI(str2), hadoopConfiguration);
                Path path = new Path(str2);
                fileSystem.resolvePath(path);
                apply = package$.MODULE$.Iterator().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{new Tuple2(fileSystem, path)}));
            } catch (Throwable th) {
                MODULE$.logger().error(ExceptionUtils.getRootCauseMessage(th));
            }
            return apply;
        }, Buffer$.MODULE$.canBuildFrom())).filter(tuple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$run$3(tuple2));
        })).map(tuple22 -> {
            return (Path) tuple22._2();
        }, Buffer$.MODULE$.canBuildFrom())).toList();
        Set set = list.toSet();
        Set diff = list.toSet().diff(set);
        if (!diff.isEmpty()) {
            throw new IllegalArgumentException(new StringBuilder(77).append("The following paths are invalid (do not exist or are not a (readable) file): ").append(diff).toString());
        }
        RDD groupNamedGraphsByGraphIri = RddOfDatasetOps$.MODULE$.groupNamedGraphsByGraphIri(orCreate.sparkContext().union(((SetLike) set.map(path -> {
            return (RDD) net.sansa_stack.rdf.spark.io.package$.MODULE$.RDFReader(orCreate).datasets(Lang.TRIG).apply(path.toString());
        }, Set$.MODULE$.canBuildFrom())).toSeq(), ClassTag$.MODULE$.apply(Dataset.class)), cmdSansaTrigMerge.sort, cmdSansaTrigMerge.distinct, cmdSansaTrigMerge.numPartitions);
        RDFFormat findRdfFormat = RDFLanguagesEx.findRdfFormat(cmdSansaTrigMerge.outFormat);
        if (cmdSansaTrigMerge.outFolder == null && cmdSansaTrigMerge.outFile == null) {
            OutputStream openStdOutWithCloseShield = StdIo.openStdOutWithCloseShield();
            WriterStreamRDFBase writerStream = StreamRDFWriter.getWriterStream(openStdOutWithCloseShield, findRdfFormat, (Context) null);
            if (writerStream instanceof WriterStreamRDFBase) {
                WriterStreamRDFBaseUtils.setNodeToLabel(writerStream, SyntaxLabels.createNodeToLabelAsGiven());
            }
            StreamRDFDeferred streamRDFDeferred = new StreamRDFDeferred(writerStream, true, prefixMappingImpl, cmdSansaTrigMerge.deferOutputForUsedPrefixes, Long.MAX_VALUE, (String) null);
            streamRDFDeferred.start();
            StreamRDFOps.sendPrefixesToStream(prefixMappingImpl, streamRDFDeferred);
            groupNamedGraphsByGraphIri.toLocalIterator().foreach(dataset -> {
                $anonfun$run$6(streamRDFDeferred, dataset);
                return BoxedUnit.UNIT;
            });
            streamRDFDeferred.finish();
            openStdOutWithCloseShield.flush();
        } else {
            net.sansa_stack.rdf.spark.io.package$.MODULE$.JenaDatasetWriter(groupNamedGraphsByGraphIri).configureSave().setGlobalPrefixMapping(new PrefixMappingImpl()).setOutputFormat(findRdfFormat).setMapQuadsToTriplesForTripleLangs(true).setPartitionFolder(cmdSansaTrigMerge.outFolder).setTargetFile(cmdSansaTrigMerge.outFile).setDeletePartitionFolderAfterMerge(true).run();
        }
        logger().info(new StringBuilder(25).append("Processing time: ").append(createStarted.getTime(TimeUnit.SECONDS)).append(" seconds").toString());
        return Predef$.MODULE$.int2Integer(0);
    }

    public static final /* synthetic */ boolean $anonfun$run$3(Tuple2 tuple2) {
        if (tuple2 != null) {
            return ((FileSystem) tuple2._1()).isFile((Path) tuple2._2());
        }
        throw new MatchError(tuple2);
    }

    public static final /* synthetic */ void $anonfun$run$6(StreamRDFDeferred streamRDFDeferred, Dataset dataset) {
        StreamRDFOps.sendDatasetToStream(dataset.asDatasetGraph(), streamRDFDeferred);
    }

    private CmdSansaTrigMergeImpl$() {
        MODULE$ = this;
        this.logger = LoggerFactory.getLogger(getClass());
    }
}
