package org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator.ner.filter;

import java.io.File;
import java.util.HashSet;
import java.util.Set;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.aksw.simba.topicmodeling.utils.doc.DocumentText;
import org.aksw.simba.topicmodeling.utils.doc.ner.NamedEntityInText;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/preprocessing/docsupplier/decorator/ner/filter/StopwordBasedEntityFilter.class */
public class StopwordBasedEntityFilter implements NamedEntitiesFilter {
    private static final Logger LOGGER = LoggerFactory.getLogger(StopwordBasedEntityFilter.class);
    private static final int MINIMUM_LENGTH_OF_NE = 2;
    private Set<String> stopwordlist;

    public StopwordBasedEntityFilter() {
        createWordList(new File(getClass().getClassLoader().getResource("englishStopwordlist.txt").toString().replace("file:", "")));
    }

    protected void createWordList(File file) {
        try {
            this.stopwordlist = new HashSet(FileUtils.readLines(file));
        } catch (Exception e) {
            LOGGER.error("Couldn't read word list from file. This PosTaggingTermFilter won't work as expected!", e);
        }
    }

    @Override // org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator.ner.filter.NamedEntitiesFilter
    public boolean isNamedEntityGood(Document document, NamedEntityInText namedEntityInText) {
        DocumentText property;
        if (namedEntityInText.getLength() >= MINIMUM_LENGTH_OF_NE && (property = document.getProperty(DocumentText.class)) != null) {
            return !this.stopwordlist.contains(property.getText().substring(namedEntityInText.getStartPos(), namedEntityInText.getEndPos()).toLowerCase());
        }
        return false;
    }
}
