package org.aksw.simba.topicmodeling.io.reuters;

import java.io.File;
import java.io.IOException;
import java.util.Deque;
import java.util.LinkedList;
import org.aksw.simba.topicmodeling.preprocessing.docsupplier.AbstractDocumentSupplier;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/io/reuters/ReutersSgmlReader.class */
public class ReutersSgmlReader extends AbstractDocumentSupplier {
    private static final Logger LOGGER = LoggerFactory.getLogger(ReutersSgmlReader.class);
    private static final String REUTERS_PATH = "/data/m.roeder/daten/Corpora/Reuters-21578/reut2-0";
    private static final String REUTERS_FILE_ENDING = ".sgm";
    private static final int MAX_SEGMENT_ID = 21;
    private Deque<Document> queue = new LinkedList();
    private ReutersDocumentCreator documentCreator = new ReutersDocumentCreator(this.queue);
    private int nextSegmentId;

    public Document getNextDocument() {
        Document poll = this.queue.poll();
        if (poll == null) {
            readNextSegment();
            poll = this.queue.poll();
        }
        return poll;
    }

    private void readNextSegment() {
        if (this.nextSegmentId <= MAX_SEGMENT_ID) {
            try {
                this.documentCreator.createDocuments(FileUtils.readFileToString(new File(REUTERS_PATH + (this.nextSegmentId < 10 ? "0" + this.nextSegmentId : Integer.valueOf(this.nextSegmentId)) + REUTERS_FILE_ENDING)));
                this.nextSegmentId++;
            } catch (IOException e) {
                LOGGER.error("Couldn't read segment " + this.nextSegmentId, e);
                e.printStackTrace();
            }
        }
    }
}
