package org.aksw.simba.topicmodeling.preprocessing;

import org.aksw.simba.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.aksw.simba.topicmodeling.utils.corpus.Corpus;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/preprocessing/AbstractScaleablePreprocessor.class */
public abstract class AbstractScaleablePreprocessor extends AbstractPreprocessor implements ScaleablePreprocessor {
    private static final Logger LOGGER = LoggerFactory.getLogger(AbstractScaleablePreprocessor.class);

    public AbstractScaleablePreprocessor(DocumentSupplier documentSupplier) {
        super(documentSupplier);
    }

    public AbstractScaleablePreprocessor(DocumentSupplier documentSupplier, Corpus corpus) {
        super(documentSupplier, corpus);
    }

    @Override // org.aksw.simba.topicmodeling.preprocessing.ScaleablePreprocessor
    public Corpus getCorpus(int i) {
        if (!this.corpusCreated) {
            generateCorpus(i);
        }
        return this.corpus;
    }

    private void generateCorpus(int i) {
        if (this.corpus == null) {
            this.corpus = getNewCorpus();
        }
        DocumentSupplier supplier = getSupplier();
        Document nextDocument = supplier.getNextDocument();
        while (true) {
            Document document = nextDocument;
            if (document == null || this.corpus.getNumberOfDocuments() >= i) {
                break;
            }
            addDocumentToCorpus(this.corpus, document);
            if (this.corpus.getNumberOfDocuments() % 1000 == 0) {
                LOGGER.info("Corpus has " + this.corpus.getNumberOfDocuments() + " documents.");
            }
            nextDocument = supplier.getNextDocument();
        }
        this.corpusCreated = true;
        LOGGER.info("Corpus has " + this.corpus.getNumberOfDocuments() + " documents.");
    }
}
