package org.aksw.simba.topicmodeling.preprocessing;

import java.io.File;
import org.aksw.simba.topicmodeling.io.stream.DocumentSupplierSerializer;
import org.aksw.simba.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.aksw.simba.topicmodeling.utils.corpus.Corpus;
import org.aksw.simba.topicmodeling.utils.corpus.StreamedFileBasedCorpus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/preprocessing/StreamedFileBasedCorpusCreatingPreprocessor.class */
public class StreamedFileBasedCorpusCreatingPreprocessor implements Preprocessor {
    private static final Logger LOGGER = LoggerFactory.getLogger(StreamedFileBasedCorpusCreatingPreprocessor.class);
    private DocumentSupplier supplier;
    protected StreamedFileBasedCorpus corpus = null;
    protected boolean corpusCreated = false;
    protected File corpusFile;

    public StreamedFileBasedCorpusCreatingPreprocessor(DocumentSupplier documentSupplier, File file) {
        this.supplier = documentSupplier;
        this.corpusFile = file;
    }

    @Deprecated
    public void addDocuments(DocumentSupplier documentSupplier) {
        throw new UnsupportedOperationException("This method is not implemented!");
    }

    public Corpus getCorpus() {
        if (!this.corpusCreated) {
            generateCorpus();
        }
        return this.corpus;
    }

    protected void generateCorpus() {
        this.corpus = new StreamedFileBasedCorpus(this.corpusFile, new DocumentSupplierSerializer().serialize(this.supplier, this.corpusFile));
        this.corpusCreated = true;
        LOGGER.info("Corpus has " + this.corpus.getNumberOfDocuments() + " documents.");
    }

    public boolean hasCorpus() {
        return this.corpusCreated;
    }

    public void deleteCorpus() {
        if (this.corpus != null) {
            this.corpus.clear();
            this.corpus = null;
        }
        this.corpusCreated = false;
    }
}
