001    package nl.tudelft.tbm.eeni.owlstructure.processor;
002    
003    import com.hp.hpl.jena.ontology.OntClass;
004    import com.hp.hpl.jena.ontology.OntModel;
005    import com.hp.hpl.jena.ontology.OntProperty;
006    import com.hp.hpl.jena.ontology.OntResource;
007    import com.hp.hpl.jena.query.*;
008    import com.hp.hpl.jena.rdf.model.Resource;
009    import com.hp.hpl.jena.shared.Lock;
010    import nl.tudelft.tbm.eeni.owlstructure.utils.CollectionUtils;
011    import nl.tudelft.tbm.eeni.owlstructure.utils.OntologyUtils;
012    import org.apache.commons.logging.Log;
013    import org.apache.commons.logging.LogFactory;
014    
015    import java.util.Collection;
016    import java.util.HashSet;
017    import java.util.Iterator;
018    import java.util.Set;
019    
020    /**
021     * Set or extend property domains by looking at instances in a given ontology
022     */
023    public class PropertyDomainInferer implements IOntologyProcessor {
024    
025        static Log log = LogFactory.getLog(FunctionalPropertyInferer.class);
026    
027        private boolean keepExistingDomains;
028        private boolean enableGeneralizion;
029        private boolean allowThingDomain;
030    
031        /**
032         * Creates a new property domain inferer, that infers property domains by
033         * looking at instances in the ontology.
034         *
035         * @param keepExistingDomains  Whether to keep domains already defined in the ontology. An
036         *                             existing domain may still be removed when the generalizer
037         *                             decides to add a superclass of the existing domain class to
038         *                             the domain. An existing owl:Thing domain will be removed when
039         *                             allowThingDomain is set to false. Defaults to true.
040         * @param enableGeneralization Whether to use the generalization engine is used to find
041         *                             abstract superclasses of domain classes. Defaults to true.
042         * @param allowThingDomain     Whether the domain may contain owl:Thing. Defaults to false.
043         */
044        public PropertyDomainInferer(boolean keepExistingDomains, boolean enableGeneralization, boolean allowThingDomain) {
045            this.keepExistingDomains = keepExistingDomains;
046            this.enableGeneralizion = enableGeneralization;
047            this.allowThingDomain = allowThingDomain;
048        }
049    
050        /**
051         * Creates a new property domain inferer, that infers property domains by
052         * looking at instances in the ontology.
053         *
054         * @param keepExistingDomains  Whether to keep domains already defined in the ontology. An
055         *                             existing domain may still be removed when the generalizer
056         *                             decides to add a superclass of the existing domain class to
057         *                             the domain. An existing owl:Thing domain will be removed when
058         *                             allowThingDomain is set to false. Defaults to true.
059         * @param enableGeneralization Whether to use the generalization engine is used to find
060         *                             abstract superclasses of domain classes. Defaults to true.
061         */
062        public PropertyDomainInferer(boolean keepExistingDomains, boolean enableGeneralization) {
063            this(keepExistingDomains, enableGeneralization, false);
064        }
065    
066        /**
067         * Creates a new property domain inferer, that infers property domains by
068         * looking at instances in the ontology.
069         *
070         * @param keepExistingDomains Whether to keep domains already defined in the ontology. An
071         *                            existing domain may still be removed when the generalizer
072         *                            decides to add a superclass of the existing domain class to
073         *                            the domain. An existing owl:Thing domain will be removed when
074         *                            allowThingDomain is set to false. Defaults to true.
075         */
076        public PropertyDomainInferer(boolean keepExistingDomains) {
077            this(keepExistingDomains, true);
078        }
079    
080        /**
081         * Creates a new property domain inferer, that infers property domains by
082         * looking at instances in the ontology.
083         */
084        public PropertyDomainInferer() {
085            this(true);
086        }
087    
088        /**
089         * Run the property domain inferer on all properties in the given ontology.
090         *
091         * @param ontModel The ontModel instance to work on.
092         */
093        @Override
094        public OntModel process(OntModel ontModel) {
095            // Loop over all properties
096            Collection<OntProperty> properties = ontModel.listAllOntProperties().toList();
097            for (OntProperty property : properties) {
098                // Find existing domains
099                Collection<? extends OntResource> oldDomains = property.listDomain().toList();
100                // Find classes that use this property
101                Collection<OntClass> newDomains = findPropertyDomains(ontModel, property);
102    
103                // Remove existing domains from property (retained domains will be included in newDomains and thus re-added)
104                for (OntResource domainClass : oldDomains) {
105                    property.removeDomain(domainClass);
106                }
107                // Add new domains to property
108                for (OntResource domainClass : newDomains) {
109                    property.addDomain(domainClass);
110                }
111    
112                // Debug output
113                log.info("Property domain inference for property: " + property.getLocalName() + " \n"
114                        + getLogMessage("retaining domain(s)", CollectionUtils.intersectCollections(oldDomains, newDomains)) + "\n"
115                        + getLogMessage("adding domain(s)", CollectionUtils.subtractCollections(newDomains, oldDomains)) + "\n"
116                        + getLogMessage("removing domain(s)", CollectionUtils.subtractCollections(oldDomains, newDomains)));
117            }
118    
119            return ontModel;
120        }
121    
122        /**
123         * Find the classes of things that use this property
124         */
125        private Collection<OntClass> findPropertyDomains(OntModel ontModel, OntProperty property) {
126            HashSet<OntClass> domainClasses = new HashSet<OntClass>();
127    
128            /*
129               * If keepExistingDomains is enabled, start with classes that are already assigned to this class's domain
130               */
131            if (keepExistingDomains) {
132                Iterator<? extends OntResource> domainIterator = property.listDomain();
133                while (domainIterator.hasNext()) {
134                    domainClasses.add(domainIterator.next().as(OntClass.class));
135                }
136            }
137    
138            /*
139               * Look for classes that use the given property.
140               * For all classes found, check whether this class uses the property directly;
141               * e.g. there are instances of this class and not of any subclass of it that use this property.
142               */
143            String queryString = OntologyUtils.getSparqlPrefixes(ontModel)
144                    + "select distinct ?domain "
145                    + "where { "
146                    + "  ?s <" + property.getURI() + "> ?o . "
147                    + "  ?s rdf:type ?domain . "
148                    + "} ";
149            ontModel.enterCriticalSection(Lock.READ);
150            try {
151                Query query = QueryFactory.create(queryString);
152                QueryExecution qexec = QueryExecutionFactory.create(query, ontModel);
153    
154                ResultSet results = qexec.execSelect();
155                while (results.hasNext()) {
156                    OntClass domainClass = results.nextSolution().get("domain").as(OntClass.class);
157    
158                    // See whether there are instances of ontClass that are not an instance of a subclass that use this property
159                    if (someDirectInstancesHaveProperty(ontModel, property, domainClass)) {
160                        domainClasses.add(domainClass.as(OntClass.class));
161                    }
162                }
163                qexec.close();
164            } finally {
165                ontModel.leaveCriticalSection();
166            }
167    
168            /*
169               * Look for abstract superclasses that should be in the domain of this property.
170               * These are classes that satisfy these requirements:
171               *   (a) it must have multiple disjoint subclasses,
172               *       e.g. subclasses that are not an indirect subclass of one another;
173               *   (b) all of its (indirect) instances either
174               *       (1) have this property, or
175               *       (2) have a superclass that already has the property in its domain
176               */
177            if (this.enableGeneralizion) {
178                Set<OntClass> allClasses = ontModel.listClasses().toSet();
179                for (OntClass ontClass : allClasses) {
180                    /*
181                         * Look if this class has at least two *disjoint* indirect subclasses that use this property.
182                         * Multiple classes that are in the same inheritance branch (i.e. one class is (indirect) parent to the other) count as one.
183                         */
184                    Set<OntClass> branches = new HashSet<OntClass>(OntologyUtils.listClassDescendants(ontClass));
185                    branches.retainAll(domainClasses);
186                    Iterator<OntClass> branchIterator = domainClasses.iterator();
187                    while (branchIterator.hasNext()) {
188                        OntClass branch = branchIterator.next();
189                        if (OntologyUtils.containsClassSuperset(branch, branches)) {
190                            branchIterator.remove();
191                        }
192                    }
193                    if (branches.size() > 1) {
194                        /*
195                              *  Check if *all* indirect instances of this class have the property,
196                              *  but ignore subclasses that already use this property for sure because
197                              *  at least some *direct* instances of that subclass are in the property's domain.
198                              */
199                        if (allIndirectInstancesHaveProperty(ontModel, property, ontClass, domainClasses)) {
200                            // All conditions are satisfied.
201                            domainClasses.add(ontClass);
202                        }
203                    }
204    
205                }
206            }
207    
208            /*
209               * If owl:Thing is not allowed to be in the domain, remove it
210               */
211            if (!this.allowThingDomain) {
212                domainClasses.remove(OntologyUtils.getOwlThing(ontModel));
213            }
214    
215            /*
216               * Remove classes that have at least one superclass that is also in the domain of this property,
217               * because then the inclusion of the subclass is implied by the inclusion of the superclass.
218               */
219            Iterator<OntClass> classIterator = domainClasses.iterator();
220            while (classIterator.hasNext()) {
221                OntClass domainClass = classIterator.next();
222                if (OntologyUtils.containsClassSuperset(domainClass, domainClasses)) {
223                    classIterator.remove();
224                }
225            }
226    
227            return domainClasses;
228        }
229    
230        private boolean someDirectInstancesHaveProperty(OntModel ontModel, OntProperty property, OntClass ontClass) {
231            // Find all descendants of this class
232            Collection<OntClass> descendants = OntologyUtils.listClassDescendants(ontClass);
233    
234            // Look for instances of this class that have this property, but that are not also of a type that is a subclass of this class.
235            String queryString = OntologyUtils.getSparqlPrefixes(ontModel)
236                    + "select (count(?o) as ?count) "
237                    + "where { "
238                    + "  ?s <" + property.getURI() + "> ?o . "
239                    + "  ?s rdf:type <" + ontClass.getURI() + "> . ";
240            for (OntClass descendant : descendants) {
241                queryString += "  unsaid { ?s rdf:type <" + descendant.getURI() + "> } . ";
242            }
243            queryString += "} ";
244    
245            ontModel.enterCriticalSection(Lock.READ);
246            int instanceCount;
247            try {
248                Query query = QueryFactory.create(queryString, Syntax.syntaxARQ);
249                QueryExecution qexec = QueryExecutionFactory.create(query, ontModel);
250    
251                ResultSet results = qexec.execSelect();
252                if (results.hasNext()) {
253                    instanceCount = results.nextSolution().getLiteral("count").getInt();
254                } else {
255                    instanceCount = 0;
256                }
257                OntologyUtils.closeIterator(results);
258                qexec.close();
259            } finally {
260                ontModel.leaveCriticalSection();
261            }
262    
263            // If any instances, then there must be at least one direct instance that uses this property.
264            if (instanceCount > 0) {
265                return true;
266            } else {
267                return false;
268            }
269        }
270    
271        private boolean allIndirectInstancesHaveProperty(OntModel ontModel, OntProperty property, OntClass ontClass, Collection<OntClass> ignoreBranches) {
272            // Find all classes in ignored branches
273            Set<OntClass> ignoreClasses = new HashSet<OntClass>(ignoreBranches);
274            ignoreClasses.retainAll(OntologyUtils.listClassDescendants(ontClass));
275            for (OntClass ignoreClass : ignoreBranches) {
276                ignoreClasses.add(ignoreClass);
277                ignoreClasses.addAll(OntologyUtils.listClassDescendants(ignoreClass));
278            }
279    
280            // Find all non-ignored descendants of this class
281            Collection<OntClass> includeClasses = OntologyUtils.listClassDescendants(ontClass);
282            includeClasses.removeAll(ignoreClasses);
283    
284            // Look for indirect instances that don't have this property, but are not of a class in one of the ignored branches.
285            String queryString = OntologyUtils.getSparqlPrefixes(ontModel)
286                    + "select (count(?o) as ?count) "
287                    + "where { "
288                    + "  ?o rdf:type ?t . "
289                    + "  unsaid { ?s <" + property.getURI() + "> ?o } . ";
290            for (OntClass descendant : ignoreClasses) {
291                queryString +=
292                        "  unsaid { ?o rdf:type <" + descendant.getURI() + "> } . ";
293            }
294            queryString +=
295                    "  filter ( "
296                            + "       ?t = <" + ontClass.getURI() + "> ";
297            for (OntClass descendant : includeClasses) {
298                queryString +=
299                        "    || ?t = <" + descendant.getURI() + "> ";
300            }
301            queryString +=
302                    "  ) . "
303                            + "} ";
304    
305            ontModel.enterCriticalSection(Lock.READ);
306            int instanceCount;
307            try {
308                Query query = QueryFactory.create(queryString, Syntax.syntaxARQ);
309                QueryExecution qexec = QueryExecutionFactory.create(query, ontModel);
310    
311                ResultSet results = qexec.execSelect();
312                if (results.hasNext()) {
313                    instanceCount = results.nextSolution().getLiteral("count").getInt();
314                } else {
315                    instanceCount = 0;
316                }
317                OntologyUtils.closeIterator(results);
318                qexec.close();
319            } finally {
320                ontModel.leaveCriticalSection();
321            }
322    
323            // If at least one instance was found, then at least one non-ignored indirect instance of this class does not have this property.
324            if (instanceCount == 0) {
325                return true;
326            } else {
327                return false;
328            }
329        }
330    
331        /**
332         * Format a debug message containing a message and a list of resource localNames
333         */
334        private String getLogMessage(String message, Collection<? extends Resource> resources) {
335            String result = "  - " + message + ": ";
336            if (resources.size() > 0) {
337                int counter = 0;
338                for (Resource resource : resources) {
339                    result += (counter++ > 0 ? ", " : "") + resource.getLocalName();
340                }
341            } else {
342                result += "none";
343            }
344            return result;
345        }
346    }