package net.sansa_stack.spark.cli.impl;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import net.sansa_stack.spark.cli.cmd.CmdSansaNgsSort;
import net.sansa_stack.spark.io.rdf.input.api.RdfSourceFactory;
import net.sansa_stack.spark.io.rdf.input.impl.RdfSourceFactoryImpl;
import net.sansa_stack.spark.io.rdf.output.RddRdfWriterFactory;
import net.sansa_stack.spark.rdd.op.rdf.JavaRddOfDatasetsOps;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:net/sansa_stack/spark/cli/impl/CmdSansaNgsSortImpl.class */
public class CmdSansaNgsSortImpl {
    private static final Logger logger = LoggerFactory.getLogger(CmdSansaNgsSortImpl.class);

    public static int run(CmdSansaNgsSort cmdSansaNgsSort) throws IOException {
        List<String> list = cmdSansaNgsSort.inputFiles;
        SparkSession orCreate = CmdUtils.newDefaultSparkSessionBuilder().appName("Sansa NGS Sort (" + cmdSansaNgsSort.inputFiles + ")").getOrCreate();
        Configuration hadoopConfiguration = orCreate.sparkContext().hadoopConfiguration();
        RddRdfWriterFactory configureWriter = CmdUtils.configureWriter(cmdSansaNgsSort.outputConfig);
        configureWriter.validate();
        CmdUtils.validatePaths(list, hadoopConfiguration);
        JavaSparkContext javaSparkContext = new JavaSparkContext(orCreate.sparkContext());
        RdfSourceFactory from = RdfSourceFactoryImpl.from(orCreate);
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = cmdSansaNgsSort.inputFiles.iterator();
        while (it.hasNext()) {
            arrayList.add(from.get(it.next()).asDatasets().toJavaRDD());
        }
        StopWatch createStarted = StopWatch.createStarted();
        configureWriter.forDataset(JavaRddOfDatasetsOps.groupNamedGraphsByGraphIri(javaSparkContext.union((JavaRDD[]) arrayList.toArray(new JavaRDD[0])), cmdSansaNgsSort.sort, cmdSansaNgsSort.distinct, cmdSansaNgsSort.numPartitions)).run();
        logger.info("Processing time: " + createStarted.getTime(TimeUnit.SECONDS) + " seconds");
        return 0;
    }
}
