package org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.ner;

import java.util.Set;
import org.dice_research.topicmodeling.lang.Term;
import org.dice_research.topicmodeling.utils.doc.ner.NamedEntityInText;

/* loaded from: input_file:org/dice_research/topicmodeling/preprocessing/docsupplier/decorator/ner/EntityBasedTokenizer.class */
public class EntityBasedTokenizer extends AbstractNerPropagationPreprocessor implements EntityTokenSurfaceFormMappingSupplier {
    private static final String CHARS_TO_REPLACE = "[ \n\r\t\\.' \\(\\)\\[\\]\\{\\}]";
    private static final String CHARS_TO_INSERT = "";
    protected EntityTermMapping surfaceFormsMapping = new EntityTermMapping();

    @Override // org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.ner.AbstractNerPropagationPreprocessor
    protected String processEntity(NamedEntityInText namedEntityInText, String str, Set<String> set) {
        String replaceAll = str.replaceAll(CHARS_TO_REPLACE, CHARS_TO_INSERT);
        set.add(replaceAll);
        this.surfaceFormsMapping.entities.add(namedEntityInText);
        this.surfaceFormsMapping.terms.add(getTokensAfterPosTagging(namedEntityInText, str));
        return replaceAll;
    }

    protected Term[] getTokensAfterPosTagging(NamedEntityInText namedEntityInText, String str) {
        Term term = new Term(str);
        term.prop.setNoun(true);
        return new Term[]{term};
    }

    @Override // org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.ner.EntityTokenSurfaceFormMappingSupplier
    public EntityTermMapping getLastEntityTokenSurfaceFormMapping() {
        EntityTermMapping entityTermMapping = this.surfaceFormsMapping;
        this.surfaceFormsMapping = new EntityTermMapping();
        return entityTermMapping;
    }
}
