package net.sansa_stack.hadoop.format.commons_csv.csv;

import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Streams;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Stream;
import net.sansa_stack.hadoop.core.Accumulating;
import net.sansa_stack.hadoop.core.RecordReaderGenericBase;
import net.sansa_stack.hadoop.core.pattern.CustomPattern;
import net.sansa_stack.hadoop.core.pattern.CustomPatternJava;
import net.sansa_stack.hadoop.format.jena.base.RecordReaderConf;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:net/sansa_stack/hadoop/format/commons_csv/csv/RecordReaderCsv.class */
public class RecordReaderCsv extends RecordReaderGenericBase<List, List, List, List> {
    private static final Logger logger = LoggerFactory.getLogger(RecordReaderCsv.class);
    public static final String RECORD_MINLENGTH_KEY = "mapreduce.input.csv.record.minlength";
    public static final String RECORD_MAXLENGTH_KEY = "mapreduce.input.csv.record.maxlength";
    public static final String RECORD_PROBECOUNT_KEY = "mapreduce.input.csv.record.probecount";
    public static final String CSV_FORMAT_RAW_KEY = "mapreduce.input.csv.format.raw";
    public static final String CELL_MAXLENGTH_KEY = "mapreduce.input.csv.cell.maxlength";
    public static final long CELL_MAXLENGTH_DEFAULT_VALUE = 300000;
    protected CSVFormat requestedCsvFormat;
    protected CSVFormat effectiveCsvFormat;

    /* loaded from: input_file:net/sansa_stack/hadoop/format/commons_csv/csv/RecordReaderCsv$State.class */
    private static class State extends AbstractIterator<List> implements AutoCloseable {
        public Reader reader;
        public CSVFormat effectiveCsvFormat;
        public Iterator<CSVRecord> iterator;
        public CSVParser csvParser = null;
        public long seenRowLength = -1;
        public List<String> priorRow = null;
        public long counter = 0;

        State(Reader reader, CSVFormat cSVFormat) {
            this.reader = reader;
            this.effectiveCsvFormat = cSVFormat;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: computeNext, reason: merged with bridge method [inline-methods] */
        public List m5computeNext() {
            List<String> list;
            Iterator<CSVRecord> it = this.iterator;
            if (it == null) {
                try {
                    this.csvParser = new CSVParser(this.reader, this.effectiveCsvFormat);
                    Iterator<CSVRecord> it2 = this.csvParser.iterator();
                    this.iterator = it2;
                    it = it2;
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
            if (it.hasNext()) {
                List<String> list2 = it.next().toList();
                this.counter++;
                long size = list2.size();
                if (this.seenRowLength == -1) {
                    this.seenRowLength = size;
                } else if (size != this.seenRowLength) {
                    throw new IllegalStateException(String.format("At row %d: Current row length (%d) does not match prior one (%d) - current: %s | prior: %s", Long.valueOf(this.counter), Long.valueOf(size), Long.valueOf(this.seenRowLength), list2, this.priorRow));
                }
                this.priorRow = list2;
                list = list2;
            } else {
                list = (List) endOfData();
            }
            return list;
        }

        @Override // java.lang.AutoCloseable
        public void close() {
            try {
                if (this.csvParser != null) {
                    this.csvParser.close();
                } else {
                    this.reader.close();
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public static CustomPattern createStartOfCsvRecordPattern(long j) {
        return CustomPatternJava.compile("(?<=\n(?!((?<![^\"]\"[^\"]).){0," + j + "}\"(\r?\n|,|$))).", 32);
    }

    public RecordReaderCsv() {
        this(new RecordReaderConf("mapreduce.input.csv.record.minlength", "mapreduce.input.csv.record.maxlength", "mapreduce.input.csv.record.probecount", null));
    }

    public RecordReaderCsv(RecordReaderConf recordReaderConf) {
        super(recordReaderConf, Accumulating.identity());
    }

    @Override // net.sansa_stack.hadoop.core.RecordReaderGenericBase
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
        super.initialize(inputSplit, taskAttemptContext);
        Configuration configuration = taskAttemptContext.getConfiguration();
        this.recordStartPattern = createStartOfCsvRecordPattern(configuration.getLong("mapreduce.input.csv.cell.maxlength", 300000L));
        this.requestedCsvFormat = FileInputFormatCsv.getCsvFormat(configuration, CSVFormat.EXCEL);
        this.effectiveCsvFormat = disableSkipHeaderRecord(this.requestedCsvFormat);
    }

    protected CSVFormat disableSkipHeaderRecord(CSVFormat cSVFormat) {
        return CSVFormat.Builder.create(cSVFormat).setSkipHeaderRecord(false).build();
    }

    protected CSVParser newCsvParser(Reader reader) throws IOException {
        return new CSVParser(reader, this.effectiveCsvFormat);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sansa_stack.hadoop.core.RecordReaderGenericBase
    public Stream<List> createRecordFlow() throws IOException {
        Stream<List> createRecordFlow = super.createRecordFlow();
        if (this.requestedCsvFormat.getSkipHeaderRecord() && this.isFirstSplit) {
            createRecordFlow = createRecordFlow.skip(1L);
        }
        return createRecordFlow;
    }

    /* JADX WARN: Type inference failed for: r0v0, types: [java.util.Iterator, net.sansa_stack.hadoop.format.commons_csv.csv.RecordReaderCsv$State, java.lang.Object] */
    @Override // net.sansa_stack.hadoop.core.RecordReaderGenericBase
    protected Stream<List> parse(InputStream inputStream, boolean z) {
        ?? state = new State(new InputStreamReader(inputStream), this.effectiveCsvFormat);
        Stream stream = Streams.stream((Iterator) state);
        Objects.requireNonNull(state);
        return (Stream) stream.onClose(state::close);
    }
}
