package net.sansa_stack.hadoop.jena.trash;

import com.google.common.base.StandardSystemProperty;
import io.reactivex.rxjava3.core.Flowable;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.io.StringReader;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.file.CopyOption;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.AbstractMap;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sansa_stack.hadoop.generic.RecordReaderGenericBase;
import net.sansa_stack.hadoop.jena.rdf.trig.RecordReaderTrigDataset;
import net.sansa_stack.hadoop.util.InputStreamWithCloseIgnore;
import net.sansa_stack.hadoop.util.InputStreamWithCloseLogging;
import net.sansa_stack.hadoop.util.InterruptingReadableByteChannel;
import net.sansa_stack.hadoop.util.ReadableByteChannelWithConditionalBound;
import net.sansa_stack.hadoop.util.ReadableByteChannelWithoutCloseOnInterrupt;
import net.sansa_stack.hadoop.util.SeekableInputStream;
import org.aksw.jena_sparql_api.io.binseach.BufferFromInputStream;
import org.aksw.jena_sparql_api.io.binseach.CharSequenceFromSeekable;
import org.aksw.jena_sparql_api.io.binseach.Seekable;
import org.aksw.jena_sparql_api.rx.RDFDataMgrRx;
import org.apache.commons.io.input.BoundedInputStream;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.io.compress.SplitCompressionInputStream;
import org.apache.hadoop.io.compress.SplittableCompressionCodec;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.jena.ext.com.google.common.primitives.Ints;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.DatasetFactory;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.RDFFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:net/sansa_stack/hadoop/jena/trash/TrigRecordReaderOld.class */
public class TrigRecordReaderOld extends RecordReader<LongWritable, Dataset> {
    protected long maxRecordLength;
    protected long minRecordLength;
    protected int probeRecordCount;
    protected Iterator<Dataset> datasetFlow;
    protected Decompressor decompressor;
    protected FileSplit split;
    protected CompressionCodec codec;
    protected byte[] prefixBytes;
    protected FSDataInputStream rawStream;
    protected SeekableInputStream stream;
    private static final Logger logger = LoggerFactory.getLogger(RecordReaderTrigDataset.class);
    public static String MAX_RECORD_LENGTH = "mapreduce.input.trigrecordreader.record.maxlength";
    public static String MIN_RECORD_LENGTH = "mapreduce.input.trigrecordreader.record.minlength";
    public static String PROBE_RECORD_COUNT = "mapreduce.input.trigrecordreader.probe.count";
    protected static final Pattern trigFwdPattern = Pattern.compile("@?base|@?prefix|(graph\\s*)?(<[^>]*>|_?:[^-\\s]+)\\s*\\{", 10);
    protected static final Dataset EMPTY_DATASET = DatasetFactory.create();
    protected AtomicLong currentKey = new AtomicLong();
    protected Dataset currentValue = DatasetFactory.create();
    protected boolean isEncoded = false;
    protected long splitStart = -1;
    protected long splitLength = -1;
    protected long splitEnd = -1;

    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
        Configuration configuration = taskAttemptContext.getConfiguration();
        this.maxRecordLength = configuration.getInt(MAX_RECORD_LENGTH, 10485760);
        this.minRecordLength = configuration.getInt(MIN_RECORD_LENGTH, 1);
        this.probeRecordCount = configuration.getInt(PROBE_RECORD_COUNT, 10);
        String str = taskAttemptContext.getConfiguration().get("prefixes");
        Model createDefaultModel = ModelFactory.createDefaultModel();
        if (str != null) {
            RDFDataMgr.read(createDefaultModel, new StringReader(str), (String) null, Lang.TURTLE);
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        RDFDataMgr.write(byteArrayOutputStream, createDefaultModel, RDFFormat.TURTLE_PRETTY);
        this.prefixBytes = byteArrayOutputStream.toByteArray();
        this.split = (FileSplit) inputSplit;
        Path path = this.split.getPath();
        this.rawStream = path.getFileSystem(taskAttemptContext.getConfiguration()).open(path);
        this.isEncoded = false;
        this.splitStart = this.split.getStart();
        this.splitLength = this.split.getLength();
        this.splitEnd = this.splitStart + this.splitLength;
        this.codec = new CompressionCodecFactory(configuration).getCodec(this.split.getPath());
        if (null != this.codec) {
            if (!(this.codec instanceof SplittableCompressionCodec)) {
                throw new RuntimeException("Don't know how to handle codec: " + this.codec);
            }
            this.isEncoded = true;
        }
    }

    public void initDatasetFlow() throws IOException {
        this.datasetFlow = createDatasetFlow().blockingIterable().iterator();
    }

    public Map.Entry<Long, Long> setStreamToInterval(long j, long j2) throws IOException {
        AbstractMap.SimpleEntry simpleEntry;
        if (null != this.codec) {
            if (this.decompressor != null) {
            }
            this.decompressor = CodecPool.getDecompressor(this.codec);
            if (!(this.codec instanceof SplittableCompressionCodec)) {
                throw new RuntimeException("Don't know how to handle codec: " + this.codec);
            }
            SplitCompressionInputStream createInputStream = this.codec.createInputStream(this.rawStream, this.decompressor, j, j2, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            long adjustedStart = createInputStream.getAdjustedStart();
            long adjustedEnd = createInputStream.getAdjustedEnd();
            this.stream = new SeekableInputStream(new InputStreamWithCloseIgnore(InputStreamWithCloseLogging.wrap(createInputStream, ExceptionUtils::getStackTrace, RecordReaderGenericBase::logUnexpectedClose)), createInputStream);
            simpleEntry = new AbstractMap.SimpleEntry(Long.valueOf(adjustedStart), Long.valueOf(adjustedEnd));
        } else {
            this.rawStream.seek(j);
            this.stream = new SeekableInputStream(new InputStreamWithCloseIgnore(Channels.newInputStream(new InterruptingReadableByteChannel(InputStreamWithCloseLogging.wrap(this.rawStream, ExceptionUtils::getStackTrace, RecordReaderGenericBase::logUnexpectedClose), this.rawStream, j2))), this.rawStream);
            simpleEntry = new AbstractMap.SimpleEntry(Long.valueOf(j), Long.valueOf(j2));
        }
        return simpleEntry;
    }

    public static void logClose(String str) {
    }

    public static void logUnexpectedClose(String str) {
        logger.error(str);
        throw new RuntimeException("Unexpected close");
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v129, types: [java.io.InputStream] */
    protected Flowable<Dataset> createDatasetFlow() throws IOException {
        InputStream boundedInputStream;
        InputStream boundedInputStream2;
        Function function = seekable -> {
            return new SequenceInputStream(new ByteArrayInputStream(this.prefixBytes), Channels.newInputStream((ReadableByteChannel) seekable.cloneObject()));
        };
        Function function2 = seekable2 -> {
            return RDFDataMgrRx.createFlowableDatasets(() -> {
                return (InputStream) function.apply(seekable2);
            }, Lang.TRIG, (String) null);
        };
        dataset -> {
            return !dataset.isEmpty();
        };
        Predicate<Seekable> predicate = seekable3 -> {
            return ((Long) ((Flowable) function2.apply(seekable3)).take((long) this.probeRecordCount).count().onErrorReturnItem(-1L).blockingGet()).longValue() > 0;
        };
        BiPredicate biPredicate = (seekableInputStream, l) -> {
            try {
                long pos = seekableInputStream.getSeekable().getPos() - l.longValue();
                boolean z = pos >= 0;
                if (z) {
                    logger.warn("Exceeded maximum boundary by " + pos + " bytes");
                }
                return z;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        };
        long j = (2 + this.probeRecordCount) * this.maxRecordLength;
        long longValue = setStreamToInterval(this.splitEnd, this.splitEnd + j).getKey().longValue();
        BufferFromInputStream create = BufferFromInputStream.create(new BoundedInputStream(this.stream, j), 1048576, new int[0]);
        long checkedCast = skipOverNextRecord(create.newChannel(), 0L, 0L, this.maxRecordLength, j, l2 -> {
            return true;
        }, predicate) < 0 ? 0L : Ints.checkedCast(r0);
        long[] jArr = {-1};
        if (!this.isEncoded) {
            jArr[0] = this.splitLength;
        }
        long longValue2 = setStreamToInterval(this.splitStart, longValue).getKey().longValue();
        Predicate<Long> predicate2 = l3 -> {
            return jArr[0] < 0 || l3.longValue() < jArr[0];
        };
        BufferFromInputStream create2 = BufferFromInputStream.create(new BoundedInputStream(Channels.newInputStream(new ReadableByteChannelWithConditionalBound(new ReadableByteChannelWithoutCloseOnInterrupt(this.stream), readableByteChannelWithConditionalBound -> {
            if (jArr[0] >= 0 || !biPredicate.test(new SeekableInputStream(this.rawStream, this.rawStream), Long.valueOf(longValue))) {
                return false;
            }
            jArr[0] = readableByteChannelWithConditionalBound.getBytesRead();
            logger.info("Head stream encountered split end; decoded data length = " + jArr[0]);
            return false;
        })), j), 1048576, new int[0]);
        int checkedCast2 = this.splitStart == 0 ? 0 : Ints.checkedCast(skipOverNextRecord(create2.newChannel(), 0L, 0L, this.maxRecordLength, j, predicate2, predicate));
        InputStream newInputStream = Channels.newInputStream(new ReadableByteChannelWithConditionalBound(new ReadableByteChannelWithoutCloseOnInterrupt(this.stream), readableByteChannelWithConditionalBound2 -> {
            return biPredicate.test(this.stream, Long.valueOf(longValue));
        }));
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(this.prefixBytes);
        if (checkedCast2 < 0) {
            logger.warn(String.format("No data found in split starting at %d. Possible reasons: (1) A large record spanning this split, (2) Syntax error(s) in the data,(3) Some bug in the implementation", Long.valueOf(this.splitStart)));
            boundedInputStream = new ByteArrayInputStream(new byte[0]);
            newInputStream = new ByteArrayInputStream(new byte[0]);
            boundedInputStream2 = new ByteArrayInputStream(new byte[0]);
        } else {
            long knownDataSize = jArr[0] < 0 ? create2.getKnownDataSize() : jArr[0];
            Seekable newChannel = create2.newChannel();
            if (checkedCast2 != 0) {
                newChannel.nextPos(checkedCast2);
            }
            boundedInputStream = new BoundedInputStream(Channels.newInputStream((ReadableByteChannel) newChannel), knownDataSize - checkedCast2);
            if (!this.isEncoded) {
                long knownDataSize2 = longValue2 + create2.getKnownDataSize();
                if (knownDataSize2 != this.stream.getPos()) {
                    throw new RuntimeException("Expected body offset does not match actual one: adjustedSplitStart = " + longValue2 + " known head buffer size = " + create2.getKnownDataSize() + ", expected body offset = " + knownDataSize2 + ", actual body offset = " + this.stream.getPos());
                }
            }
            int i = this.isEncoded ? 1 : 0;
            Seekable newChannel2 = create.newChannel();
            newChannel2.nextPos(i);
            boundedInputStream2 = new BoundedInputStream(Channels.newInputStream((ReadableByteChannel) newChannel2), checkedCast - i);
        }
        if (0 != 0) {
            String name = this.split.getPath().getName();
            java.nio.file.Path absolutePath = Paths.get(StandardSystemProperty.JAVA_IO_TMPDIR.value(), new String[0]).toAbsolutePath();
            logger.info("Writing segment " + name + " " + this.splitStart + " to " + absolutePath);
            java.nio.file.Path resolve = absolutePath.resolve(name + "_" + this.splitStart + ".prefix.trig");
            java.nio.file.Path resolve2 = absolutePath.resolve(name + "_" + this.splitStart + ".head.trig");
            java.nio.file.Path resolve3 = absolutePath.resolve(name + "_" + this.splitStart + ".body.trig");
            java.nio.file.Path resolve4 = absolutePath.resolve(name + "_" + this.splitStart + ".tail.trig");
            Files.copy(byteArrayInputStream, resolve, new CopyOption[0]);
            Files.copy(boundedInputStream, resolve2, new CopyOption[0]);
            Files.copy(newInputStream, resolve3, new CopyOption[0]);
            Files.copy(boundedInputStream2, resolve4, new CopyOption[0]);
            byteArrayInputStream = Files.newInputStream(resolve, StandardOpenOption.READ);
            boundedInputStream = Files.newInputStream(resolve2, StandardOpenOption.READ);
            newInputStream = Files.newInputStream(resolve3, StandardOpenOption.READ);
            boundedInputStream2 = Files.newInputStream(resolve4, StandardOpenOption.READ);
        }
        InputStream wrap = InputStreamWithCloseLogging.wrap(new SequenceInputStream(Collections.enumeration(Arrays.asList(byteArrayInputStream, boundedInputStream, newInputStream, boundedInputStream2))), ExceptionUtils::getStackTrace, RecordReaderGenericBase::logClose);
        return checkedCast2 >= 0 ? RDFDataMgrRx.createFlowableDatasets(() -> {
            return wrap;
        }, Lang.TRIG, (String) null) : Flowable.empty();
    }

    public static long findNextRecord(Seekable seekable, long j, long j2, long j3, long j4, Predicate<Long> predicate, Predicate<Seekable> predicate2) throws IOException {
        int checkedCast = Ints.checkedCast(Math.min(j2 + j3, j4) - j2);
        long j5 = j4 - j2;
        Seekable cloneObject = seekable.cloneObject();
        cloneObject.setPos(j2 - j);
        Matcher matcher = trigFwdPattern.matcher(new CharSequenceFromSeekable(cloneObject));
        matcher.region(0, checkedCast);
        long findFirstPositionWithProbeSuccess = findFirstPositionWithProbeSuccess(cloneObject, predicate, matcher, true, predicate2);
        return findFirstPositionWithProbeSuccess >= 0 ? findFirstPositionWithProbeSuccess + j : -1L;
    }

    long skipOverNextRecord(Seekable seekable, long j, long j2, long j3, long j4, Predicate<Long> predicate, Predicate<Seekable> predicate2) throws IOException {
        long j5 = -1;
        long j6 = j4 - j2;
        long j7 = j2;
        int i = 0;
        while (i < 2) {
            long findNextRecord = findNextRecord(seekable, j, j7, j3, j4, predicate, predicate2);
            if (findNextRecord < 0) {
                if (j6 >= j3) {
                    logger.warn("Found no record start in a record search region of " + j3 + " bytes, although up to " + j6 + " bytes were allowed for reading");
                } else {
                    logger.warn("No more records found after pos " + (j + j7));
                }
                i = 666;
            } else {
                j5 = findNextRecord;
                if (i == 0) {
                    j7 = findNextRecord + this.minRecordLength;
                }
                i++;
            }
        }
        return j5;
    }

    public static long findFirstPositionWithProbeSuccess(Seekable seekable, Predicate<Long> predicate, Matcher matcher, boolean z, Predicate<Seekable> predicate2) throws IOException {
        Seekable cloneObject = seekable.cloneObject();
        long pos = cloneObject.getPos();
        long j = -1;
        while (true) {
            if (!matcher.find()) {
                break;
            }
            int checkedCast = Ints.checkedCast(pos + (z ? matcher.start() : (-matcher.end()) + 1));
            if (!predicate.test(Long.valueOf(checkedCast))) {
                break;
            }
            cloneObject.setPos(checkedCast);
            if (predicate2.test(cloneObject.cloneObject())) {
                j = checkedCast;
                break;
            }
        }
        return j;
    }

    public boolean nextKeyValue() throws IOException {
        boolean z;
        if (this.datasetFlow == null) {
            initDatasetFlow();
        }
        if (this.datasetFlow.hasNext()) {
            this.currentValue = this.datasetFlow.next();
            this.currentKey.incrementAndGet();
            z = this.currentValue != null;
        } else {
            z = false;
        }
        return z;
    }

    /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
    public LongWritable m9getCurrentKey() {
        return this.currentValue == null ? null : new LongWritable(this.currentKey.get());
    }

    /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
    public Dataset m8getCurrentValue() {
        return this.currentValue;
    }

    public float getProgress() throws IOException {
        return this.splitStart == this.splitEnd ? 0.0f : Math.min(1.0f, ((float) (this.rawStream.getPos() - this.splitStart)) / ((float) (this.splitEnd - this.splitStart)));
    }

    public void close() throws IOException {
        try {
            if (this.rawStream != null) {
                this.rawStream.close();
                this.rawStream = null;
            }
        } finally {
            if (this.decompressor != null) {
                CodecPool.returnDecompressor(this.decompressor);
                this.decompressor = null;
            }
        }
    }
}
