package org.dice_research.topicmodeling.preprocessing.docsupplier.decorator;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntOpenHashMap;
import java.util.Arrays;
import org.dice_research.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.dice_research.topicmodeling.utils.doc.Document;
import org.dice_research.topicmodeling.utils.doc.DocumentTextWordIds;
import org.dice_research.topicmodeling.utils.doc.DocumentWordCounts;
import org.dice_research.topicmodeling.utils.vocabulary.SimpleVocabularyBuilder;
import org.dice_research.topicmodeling.utils.vocabulary.Vocabulary;

/* loaded from: input_file:org/dice_research/topicmodeling/preprocessing/docsupplier/decorator/VocabularyReductionMappingApplyingSupplierDecorator.class */
public class VocabularyReductionMappingApplyingSupplierDecorator extends AbstractDocumentSupplierDecorator {
    public static final int REMOVED_WORD = -1;
    private int[] mapping;

    public static int[] createMapping(Vocabulary vocabulary, BitSet bitSet) {
        int[] iArr = new int[vocabulary.size()];
        Arrays.fill(iArr, -1);
        int lastSetBit = lastSetBit(bitSet, iArr.length);
        for (int i = 0; i < iArr.length; i++) {
            if (bitSet.get(i)) {
                if (iArr[i] == -1) {
                    iArr[i] = i;
                }
            } else if (lastSetBit > i) {
                iArr[lastSetBit] = i;
                lastSetBit = lastSetBit(bitSet, lastSetBit);
            }
        }
        return iArr;
    }

    public static Vocabulary updateVocabulary(Vocabulary vocabulary, int[] iArr) {
        SimpleVocabularyBuilder simpleVocabularyBuilder = new SimpleVocabularyBuilder(vocabulary);
        for (int i = 0; i < iArr.length; i++) {
            if (iArr[i] == -1) {
                simpleVocabularyBuilder.remove(i);
            } else if (iArr[i] != i) {
                simpleVocabularyBuilder.setWord(simpleVocabularyBuilder.getWord(i), iArr[i]);
            }
        }
        return simpleVocabularyBuilder.getVocabulary();
    }

    protected static int lastSetBit(BitSet bitSet, int i) {
        do {
            i--;
            if (i <= -1) {
                return i;
            }
        } while (!bitSet.get(i));
        return i;
    }

    public VocabularyReductionMappingApplyingSupplierDecorator(DocumentSupplier documentSupplier, int[] iArr) {
        super(documentSupplier);
        this.mapping = iArr;
    }

    @Override // org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.AbstractDocumentSupplierDecorator
    protected Document prepareDocument(Document document) {
        DocumentTextWordIds documentTextWordIds = (DocumentTextWordIds) document.getProperty(DocumentTextWordIds.class);
        if (documentTextWordIds != null) {
            document.addProperty(updateWordIds(documentTextWordIds));
        }
        DocumentWordCounts documentWordCounts = (DocumentWordCounts) document.getProperty(DocumentWordCounts.class);
        if (documentWordCounts != null) {
            document.addProperty(updateWordCounts(documentWordCounts));
        }
        return document;
    }

    protected DocumentTextWordIds updateWordIds(DocumentTextWordIds documentTextWordIds) {
        int[] wordIds = documentTextWordIds.getWordIds();
        IntArrayList intArrayList = new IntArrayList(wordIds.length);
        for (int i : wordIds) {
            int i2 = this.mapping[i];
            if (i2 != -1) {
                intArrayList.add(i2);
            }
        }
        return new DocumentTextWordIds(intArrayList.toArray());
    }

    protected DocumentWordCounts updateWordCounts(DocumentWordCounts documentWordCounts) {
        IntIntOpenHashMap wordCounts = documentWordCounts.getWordCounts();
        for (int i = 0; i < this.mapping.length; i++) {
            if (this.mapping[i] == -1) {
                wordCounts.remove(i);
            } else if (this.mapping[i] != i) {
                wordCounts.put(this.mapping[i], wordCounts.remove(i));
            }
        }
        return documentWordCounts;
    }
}
