package org.nlp2rdf.plugin;

import com.hp.hpl.jena.ontology.DatatypeProperty;
import com.hp.hpl.jena.ontology.Individual;
import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntModel;
import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.EnglishLemmatizer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.log4j.Logger;
import org.nlp2rdf.datastructure.BackboneVocabulary;
import org.nlp2rdf.datastructure.DocumentDTO;
import org.nlp2rdf.datastructure.SentenceDTO;
import org.nlp2rdf.datastructure.TokenDTO;
import org.nlp2rdf.plugin.interfaces.DependentAddPlugin;
import org.nlp2rdf.util.string.Progress;

/* loaded from: input_file:org/nlp2rdf/plugin/MorphadornerLemmatizer.class */
public class MorphadornerLemmatizer extends DependentAddPlugin {
    private static final Logger logger = Logger.getLogger(MorphadornerLemmatizer.class);
    private EnglishLemmatizer englishLemmatizer;

    public MorphadornerLemmatizer() {
        this.englishLemmatizer = null;
        try {
            this.englishLemmatizer = new EnglishLemmatizer();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override // org.nlp2rdf.plugin.interfaces.AbstarctPlugin
    protected DocumentDTO _process(DocumentDTO documentDTO) {
        HashMap hashMap = new HashMap();
        int i = 0;
        OntModel model = documentDTO.getModel();
        DatatypeProperty datatypeProperty = model.getDatatypeProperty(BackboneVocabulary.hasLemmaProperty);
        for (SentenceDTO sentenceDTO : documentDTO.getSentences()) {
            int i2 = i;
            i++;
            Progress.toLog("lemmatizing Sentence: ", i2, documentDTO.getSentences().size(), 5);
            int i3 = 0;
            while (i3 < sentenceDTO.getTokenDTOs().size()) {
                TokenDTO tokenDTO = sentenceDTO.getTokenDTOs().get(i3);
                Individual individual = model.getIndividual(tokenDTO.getUri());
                String label = tokenDTO.getLabel();
                try {
                    String determineType = determineType(model, individual.getURI());
                    String lemmatize = this.englishLemmatizer.lemmatize(label, determineType);
                    String lowerCase = i3 == 0 ? lemmatize.toLowerCase() : lemmatize;
                    logger.trace(determineType + " | " + label + " -> " + lowerCase);
                    hashMap.put(individual.getURI(), lowerCase);
                } catch (Exception e) {
                    logger.warn("no lemma found for " + label + " using spelling directly");
                    logger.warn(e.toString());
                    hashMap.put(individual.getURI(), label);
                }
                i3++;
            }
        }
        for (String str : hashMap.keySet()) {
            model.getIndividual(str).addProperty(datatypeProperty, (String) hashMap.get(str));
        }
        return null;
    }

    protected String determineType(OntModel ontModel, String str) {
        List list = ontModel.getIndividual(str).listOntClasses(false).toList();
        logger.debug("found " + list.size() + " classes for " + str);
        Iterator it = list.iterator();
        while (it.hasNext()) {
            String lowerCase = ((OntClass) it.next()).getURI().toLowerCase();
            if (lowerCase.contains("verb")) {
                return "verb";
            }
            if (lowerCase.contains("determiner")) {
                return "determiner";
            }
            if (lowerCase.contains("noun")) {
                return "noun";
            }
        }
        return "noun";
    }
}
