package org.dice_research.topicmodeling.preprocessing.corpus;

import com.carrotsearch.hppc.BitSet;
import java.util.List;
import java.util.Spliterators;
import java.util.concurrent.atomic.AtomicIntegerArray;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.dice_research.topicmodeling.preprocessing.consume.DocumentFrequencyDeterminer;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.VocabularyReductionMappingApplyingSupplierDecorator;
import org.dice_research.topicmodeling.utils.corpus.Corpus;
import org.dice_research.topicmodeling.utils.corpus.DocumentListCorpus;
import org.dice_research.topicmodeling.utils.corpus.properties.CorpusVocabulary;
import org.dice_research.topicmodeling.utils.vocabulary.Vocabulary;

/* loaded from: input_file:org/dice_research/topicmodeling/preprocessing/corpus/DocumentFrequencyBasedCorpusPreprocessor.class */
public class DocumentFrequencyBasedCorpusPreprocessor implements CorpusPreprocessor {
    private int minDF;
    private int maxDF;

    public DocumentFrequencyBasedCorpusPreprocessor(int i, int i2) {
        this.minDF = 0;
        this.maxDF = Integer.MAX_VALUE;
        this.minDF = i;
        this.maxDF = i2;
    }

    @Override // org.dice_research.topicmodeling.preprocessing.corpus.CorpusPreprocessor
    public Corpus preprocess(Corpus corpus) {
        Vocabulary vocabulary = (Vocabulary) corpus.getProperty(CorpusVocabulary.class).get();
        DocumentFrequencyDeterminer documentFrequencyDeterminer = new DocumentFrequencyDeterminer(vocabulary);
        StreamSupport.stream(Spliterators.spliterator(corpus.iterator(), corpus.getNumberOfDocuments(), 0), true).forEach(documentFrequencyDeterminer);
        int[] createWordIdMapping = createWordIdMapping(vocabulary, documentFrequencyDeterminer.getCounts(), this.minDF, this.maxDF);
        Vocabulary updateVocabulary = VocabularyReductionMappingApplyingSupplierDecorator.updateVocabulary(vocabulary, createWordIdMapping);
        DocumentListCorpus documentListCorpus = new DocumentListCorpus((List) StreamSupport.stream(Spliterators.spliterator(corpus.iterator(), corpus.getNumberOfDocuments(), 0), true).map(new VocabularyReductionMappingApplyingSupplierDecorator(null, createWordIdMapping)).collect(Collectors.toList()));
        documentListCorpus.addProperty(new CorpusVocabulary(updateVocabulary));
        return documentListCorpus;
    }

    public static int[] createWordIdMapping(Vocabulary vocabulary, AtomicIntegerArray atomicIntegerArray, int i, int i2) {
        BitSet bitSet = new BitSet(atomicIntegerArray.length());
        for (int i3 = 0; i3 < atomicIntegerArray.length(); i3++) {
            int i4 = atomicIntegerArray.get(i3);
            if (i4 >= i && i4 <= i2) {
                bitSet.set(i3);
            }
        }
        return VocabularyReductionMappingApplyingSupplierDecorator.createMapping(vocabulary, bitSet);
    }
}
