package org.nlp2rdf.plugin;

import com.hp.hpl.jena.ontology.Individual;
import com.hp.hpl.jena.ontology.ObjectProperty;
import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.vocabulary.OWL;
import com.jamonapi.Monitor;
import com.jamonapi.MonitorFactory;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.nlp2rdf.datastructure.BackboneVocabulary;
import org.nlp2rdf.datastructure.DocumentDTO;
import org.nlp2rdf.datastructure.OliaVocabulary;
import org.nlp2rdf.plugin.interfaces.DependentAddPlugin;
import org.nlp2rdf.plugin.interfaces.IDependentAddPlugin;
import org.nlp2rdf.util.TokenHelper;
import org.nlp2rdf.util.string.Progress;
import org.nlp2rdf.util.string.Time;
import org.nlp2rdf.util.string.URLencode;

/* loaded from: input_file:org/nlp2rdf/plugin/UriStandardizer.class */
public class UriStandardizer extends DependentAddPlugin implements IDependentAddPlugin {
    private static final Logger logger = Logger.getLogger(UriStandardizer.class);
    private String prefix;
    private String normUriString;
    private Set<String> filterLemmas;
    private Set<String> filterClasses = new HashSet(Arrays.asList(OliaVocabulary.olia_article));

    @Override // org.nlp2rdf.plugin.interfaces.AbstarctPlugin
    public DocumentDTO _process(DocumentDTO documentDTO) {
        HashMap hashMap = new HashMap();
        OntModel model = documentDTO.getModel();
        OntClass ontClass = model.getOntClass(BackboneVocabulary.tokenClass);
        Property property = model.getProperty(BackboneVocabulary.hasLemmaProperty);
        Set<Individual> set = model.listIndividuals(ontClass).toSet();
        int i = 0;
        for (Individual individual : set) {
            Monitor timeMonitor = MonitorFactory.getTimeMonitor("words");
            Monitor timeMonitor2 = MonitorFactory.getTimeMonitor("phrase");
            int i2 = i;
            i++;
            Progress.toLog("Processing token\nwords: " + Time.neededMs(timeMonitor.getTotal()) + "\nphrases: " + Time.neededMs(timeMonitor2.getTotal()), i2, set.size(), 50);
            if (TokenHelper.isWord(individual)) {
                timeMonitor.start();
                standardizeWord(property, individual, hashMap);
                timeMonitor.stop();
            } else if (TokenHelper.isPhrase(individual)) {
                timeMonitor2.start();
                standardizePhrase(property, individual, hashMap);
                timeMonitor2.stop();
            } else if (TokenHelper.isSentence(individual)) {
            }
        }
        for (String str : hashMap.keySet()) {
            addStandardizedUri(model, str, hashMap.get(str));
        }
        return null;
    }

    public String getLemma(Property property, Individual individual) {
        String str;
        try {
            str = individual.getProperty(property).getLiteral().getLexicalForm();
            String trim = str.toLowerCase().trim();
            if (this.filterLemmas.contains(trim)) {
                logger.trace("filtered out: " + trim);
                str = null;
            } else if (trim.length() == 0) {
                logger.warn("no lemma found for token " + individual);
                str = null;
            }
        } catch (Exception e) {
            logger.warn("no lemma found for token " + individual);
            e.printStackTrace();
            str = null;
        }
        return str;
    }

    private void standardizePhrase(Property property, Individual individual, Map<String, String> map) {
        Set<Individual> listSubTokens = TokenHelper.listSubTokens(individual, BackboneVocabulary.subTokenTransProperty);
        HashSet hashSet = new HashSet();
        for (Individual individual2 : listSubTokens) {
            if (TokenHelper.isWord(individual2)) {
                hashSet.add(individual2);
            }
        }
        List<Individual> orderWords = TokenHelper.orderWords(hashSet);
        filterPassiveAuxilliary(orderWords);
        String str = this.prefix;
        boolean z = false;
        Iterator<Individual> it = orderWords.iterator();
        while (it.hasNext()) {
            String lemma = getLemma(property, it.next());
            if (lemma != null) {
                str = str + (z ? "_" : "") + standardizeLemma(lemma);
                z = true;
            }
        }
        if (z) {
            map.put(individual.getURI(), str);
        } else {
            logger.error("no lemma found for one word of phrase: " + individual);
        }
    }

    private void filterPassiveAuxilliary(List<Individual> list) {
        for (int i = 0; i < list.size(); i++) {
            Individual individual = list.get(i);
            ObjectProperty objectProperty = individual.getOntModel().getObjectProperty(OliaVocabulary.olia_passive_auxilliary);
            if (individual.hasProperty(objectProperty)) {
                Individual as = individual.getPropertyValue(objectProperty).as(Individual.class);
                for (int i2 = 0; i2 < list.size(); i2++) {
                    if (list.get(i2).getURI().equals(as.getURI())) {
                        logger.debug("removing " + list.get(i2).getURI());
                        list.remove(i2);
                    }
                }
            }
        }
    }

    private void standardizeWord(Property property, Individual individual, Map<String, String> map) {
        String lemma = getLemma(property, individual);
        if (lemma != null) {
            map.put(individual.getURI(), this.prefix + standardizeLemma(lemma));
        }
    }

    private void addStandardizedUri(OntModel ontModel, String str, String str2) {
        ontModel.getIndividual(str).addProperty(ontModel.createObjectProperty(this.normUriString), ontModel.createIndividual(str2, OWL.Thing));
    }

    private String standardizeLemma(String str) {
        return URLencode.encode(str.trim().toLowerCase());
    }

    public void setPrefix(String str) {
        this.prefix = str;
    }

    public void setNormUriString(String str) {
        this.normUriString = str;
    }

    public void setFilterLemmas(Set<String> set) {
        this.filterLemmas = set;
    }
}
