package org.sante.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.aksw.sante.core.Feature;
import org.aksw.sante.core.Namespacer;
import org.aksw.sante.core.ResultSetVisitor;
import org.aksw.sante.core.SerializationUtils;
import org.aksw.sante.core.URIPatternFilter;
import org.aksw.sante.entity.Entity;
import org.aksw.sante.entity.Literal;
import org.aksw.sante.entity.LiteralObject;
import org.aksw.sante.entity.Property;
import org.aksw.sante.entity.URIObject;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.sante.lucene.NLPUtils;

/* loaded from: input_file:org/sante/lucene/IndexBuilder.class */
public class IndexBuilder implements ResultSetVisitor<Entity> {
    private static Logger logger = Logger.getLogger(IndexBuilder.class);
    protected static String RDF_TYPE_PROPERTY = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
    protected static String RDFS_SUBCLASSOF_PROPERTY = "http://www.w3.org/2000/01/rdf-schema#subClassOf";
    protected IndexWriter writer;
    protected Analyzer analyzer;
    protected URIPatternFilter subjectFilter;
    protected URIPatternFilter predicateFilter;
    protected URIPatternFilter typeFilter;
    protected int MAX_CLASS_INSTANCE;
    protected String endpoint;
    protected String graph;
    protected HashMap<String, Long> classIntances;
    protected URIPatternFilter classSelector;
    protected URIPatternFilter propertySelector;
    protected URIPatternFilter ontologySelector;
    protected Namespacer namespacer;

    public IndexBuilder(IndexWriter indexWriter, Analyzer analyzer, String str, String str2) {
        this.writer = null;
        this.analyzer = null;
        this.subjectFilter = null;
        this.predicateFilter = null;
        this.typeFilter = null;
        this.MAX_CLASS_INSTANCE = 1000000000;
        this.endpoint = null;
        this.graph = null;
        this.classIntances = new HashMap<>(100);
        this.classSelector = new URIPatternFilter();
        this.classSelector.setPatternIn(new String[]{"http://www.w3.org/2000/01/rdf-schema#Class", "http://www.w3.org/2002/07/owl#Class"});
        this.propertySelector = new URIPatternFilter();
        this.propertySelector.setPatternIn(new String[]{"http://www.w3.org/1999/02/22-rdf-syntax-ns#Property", "http://www.w3.org/2002/07/owl#ObjectProperty", "http://www.w3.org/2002/07/owl#AnnotationProperty", "http://www.w3.org/2002/07/owl#FunctionalProperty", "http://www.w3.org/2002/07/owl#DatatypeProperty"});
        this.ontologySelector = new URIPatternFilter();
        this.ontologySelector.setPatternIn(new String[]{"http://www.w3.org/2002/07/owl#Ontology"});
        this.namespacer = null;
        this.writer = indexWriter;
        this.analyzer = analyzer;
        this.endpoint = str;
        this.graph = str2;
    }

    public IndexBuilder(IndexWriter indexWriter, String str, String str2) {
        this(indexWriter, new EnglishAnalyzer(), str, str2);
    }

    public IndexBuilder(IndexWriter indexWriter, URIPatternFilter uRIPatternFilter, URIPatternFilter uRIPatternFilter2, URIPatternFilter uRIPatternFilter3, Analyzer analyzer, String str, String str2) {
        this(indexWriter, analyzer, str, str2);
        this.subjectFilter = uRIPatternFilter;
        this.predicateFilter = uRIPatternFilter2;
        this.typeFilter = uRIPatternFilter3;
    }

    public IndexBuilder(IndexWriter indexWriter, URIPatternFilter uRIPatternFilter, URIPatternFilter uRIPatternFilter2, URIPatternFilter uRIPatternFilter3, Namespacer namespacer, Analyzer analyzer, String str, String str2) {
        this(indexWriter, uRIPatternFilter, uRIPatternFilter2, uRIPatternFilter3, analyzer, str, str2);
        this.namespacer = namespacer;
    }

    @Override // 
    public boolean visit(Entity entity) {
        logger.debug("Indexing " + entity.getURI());
        ArrayList<Feature> arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        hashSet.add(entity.getURI());
        HashSet hashSet2 = new HashSet();
        try {
            List<Property> properties = entity.getProperties(RDF_TYPE_PROPERTY);
            if (properties != null) {
                for (Property property : properties) {
                    if (this.typeFilter != null && !this.typeFilter.evalute(property.getObject().getURI())) {
                        return true;
                    }
                }
            }
            Document document = new Document();
            String type = getType(RDF_TYPE_PROPERTY, entity);
            if (type == null) {
                type = getType(RDFS_SUBCLASSOF_PROPERTY, entity);
            }
            if (type == null) {
                type = "ENTITY";
            }
            String uri = entity.getURI();
            if (this.subjectFilter == null || this.subjectFilter.evalute(uri)) {
                for (Property property2 : entity.getAllProperties()) {
                    if (this.predicateFilter == null || this.predicateFilter.evalute(property2.getURI())) {
                        String uri2 = property2.getURI();
                        if (this.namespacer != null) {
                            uri2 = this.namespacer.rename(uri2);
                        }
                        hashSet.add(uri2);
                        hashSet.add(uri + "#" + uri2);
                        Feature feature = new Feature();
                        feature.uri = uri2;
                        String str = uri2;
                        LiteralObject object = property2.getObject();
                        Iterator it = property2.getLabels().iterator();
                        while (it.hasNext()) {
                            str = str + " " + ((Literal) it.next()).getValue();
                        }
                        if (object != null) {
                            String uri3 = object.getURI();
                            hashSet.add(uri2 + "#" + uri3);
                            hashSet.add(uri3);
                            if (object.isLiteral()) {
                                str = str + " " + object.getValue();
                            } else {
                                if (property2.getURI().equals(RDF_TYPE_PROPERTY)) {
                                    hashSet2.add(uri3);
                                }
                                Iterator it2 = ((URIObject) object).getLabels().iterator();
                                while (it2.hasNext()) {
                                    str = str + " " + ((Literal) it2.next()).getValue();
                                }
                                str = str + " " + uri3;
                            }
                        }
                        feature.label = str;
                        arrayList.add(feature);
                    }
                }
            }
            for (Feature feature2 : arrayList) {
                document.add(new TextField(feature2.uri, NLPUtils.tokenize(feature2.uri, NLPUtils.tokenize(feature2.label, NLPUtils.Filter.URI_PATH, NLPUtils.Filter.ACCENT, NLPUtils.Filter.UNDERSCORE, NLPUtils.Filter.YAGO), this.analyzer), Field.Store.NO));
            }
            Iterator it3 = hashSet.iterator();
            while (it3.hasNext()) {
                document.add(new StringField("RESOURCE", (String) it3.next(), Field.Store.NO));
            }
            Iterator it4 = hashSet2.iterator();
            while (it4.hasNext()) {
                document.add(new StringField("TYPE", (String) it4.next(), Field.Store.NO));
            }
            String label = entity.getLabel();
            int length = label != null ? NLPUtils.tokens(label, NLPUtils.Filter.URI_PATH, NLPUtils.Filter.ACCENT, NLPUtils.Filter.UNDERSCORE, NLPUtils.Filter.YAGO).length : 0;
            document.add(new StringField("TYPE", type, Field.Store.YES));
            long size = hashSet.size();
            if (type.equals("ENTITY")) {
                size *= 10000;
            } else if (!type.equals("PROPERTY")) {
                size *= 100;
            }
            FieldType fieldType = new FieldType();
            fieldType.setIndexOptions(IndexOptions.NONE);
            fieldType.setStored(true);
            document.add(new Field("DATA", SerializationUtils.write(entity), fieldType));
            document.add(new StringField("URI", uri, Field.Store.YES));
            document.add(new NumericDocValuesField("ENTITY_WEIGHT", hashSet.size()));
            document.add(new NumericDocValuesField("LABEL_WEIGHT", length));
            document.add(new NumericDocValuesField("ENTITY_RANK_FIELD", size));
            this.writer.addDocument(document);
            return true;
        } catch (IOException e) {
            logger.error("Error processing entity " + entity.getURI(), e);
            return true;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getType(String str, Entity entity) {
        List<Property> properties = entity.getProperties(str);
        if (properties == null) {
            return null;
        }
        for (Property property : properties) {
            if (this.propertySelector != null && this.propertySelector.evalute(property.getObject().getURI())) {
                return "PROPERTY";
            }
            if (this.classSelector != null && this.classSelector.evalute(property.getObject().getURI())) {
                return "CLASS";
            }
            if (this.ontologySelector != null && this.ontologySelector.evalute(property.getObject().getURI())) {
                return "ONTOLOGY";
            }
        }
        return null;
    }
}
