001    package nl.tudelft.tbm.eeni.owlstructure.processor;
002    
003    import com.hp.hpl.jena.ontology.OntClass;
004    import com.hp.hpl.jena.ontology.OntModel;
005    import com.hp.hpl.jena.ontology.OntProperty;
006    import com.hp.hpl.jena.query.*;
007    import com.hp.hpl.jena.rdf.model.Resource;
008    import com.hp.hpl.jena.shared.Lock;
009    import nl.tudelft.tbm.eeni.owlstructure.utils.CollectionUtils;
010    import nl.tudelft.tbm.eeni.owlstructure.utils.OntologyUtils;
011    import org.apache.commons.logging.Log;
012    import org.apache.commons.logging.LogFactory;
013    
014    import java.util.Collection;
015    import java.util.HashSet;
016    import java.util.Iterator;
017    
018    /**
019     * Set or extend property ranges by looking at instances in a given ontology
020     */
021    public class PropertyRangeInferer implements IOntologyProcessor {
022    
023        static Log log = LogFactory.getLog(FunctionalPropertyInferer.class);
024    
025        private boolean keepExistingRanges;
026        private boolean allowThingRange;
027    
028        /**
029         * Creates a new property range inferer, that infers property ranges by
030         * looking at instances in the ontology.
031         *
032         * @param keepExistingRanges Whether to keep ranges already defined in the ontology.
033         *                           An existing owl:Thing range will still be removed when
034         *                           keepExistingRanges is set to false. Defaults to true.
035         * @param allowThingRange    Whether the range may contain the class owl:Thing. Defaults to false.
036         */
037        public PropertyRangeInferer(boolean keepExistingRanges, boolean allowThingRange) {
038            this.keepExistingRanges = keepExistingRanges;
039            this.allowThingRange = allowThingRange;
040        }
041    
042        /**
043         * Creates a new property range inferer, that infers property ranges by
044         * looking at instances in the ontology.
045         *
046         * @param keepExistingRanges Whether to keep ranges already defined in the ontology.
047         *                           An existing owl:Thing range will still be removed when
048         *                           keepExistingRanges is set to false. Defaults to true.
049         */
050        public PropertyRangeInferer(boolean keepExistingRanges) {
051            this(keepExistingRanges, false);
052        }
053    
054        /**
055         * Creates a new property range inferer, that infers property ranges by
056         * looking at instances in the ontology.
057         */
058        public PropertyRangeInferer() {
059            this(true, false);
060        }
061    
062        /**
063         * Run the propery range inferer on all classes in the given ontology
064         *
065         * @param ontModel The ontology model to work on
066         */
067        @Override
068        public OntModel process(OntModel ontModel) {
069            // Loop over all properties
070            Collection<OntProperty> properties = ontModel.listAllOntProperties().toList();
071            for (OntProperty property : properties) {
072                // Find existing ranges
073                Collection<Resource> oldRanges = new HashSet<Resource>(property.listRange().toList());
074                // Find what classes/datatypes this property is used to refer to
075                Collection<Resource> newRanges = findPropertyRanges(ontModel, property);
076    
077                // Remove existing ranges from property (retained ranges will be included in newRanges and thus re-added)
078                for (Resource range : oldRanges) {
079                    property.removeRange(range);
080                }
081                // Add new ranges to property
082                for (Resource range : newRanges) {
083                    property.addRange(range);
084                }
085    
086                // Debug output
087                log.info("Property range inference for property: " + property.getLocalName() + "\n"
088                        + getLogMessage("retaining range(s)", CollectionUtils.intersectCollections(oldRanges, newRanges)) + "\n"
089                        + getLogMessage("adding range(s)", CollectionUtils.subtractCollections(newRanges, oldRanges)) + "\n"
090                        + getLogMessage("removing range(s)", CollectionUtils.subtractCollections(oldRanges, newRanges)));
091            }
092    
093            return ontModel;
094        }
095    
096        /**
097         * Given a certain property, list what datatypes (boolean, double, string, etc)
098         * or classes are used for the values that instances refer to using this property.
099         */
100        private Collection<Resource> findPropertyRanges(OntModel ontModel, OntProperty property) {
101            // Use separate lists to store datatypes and classes in the property range
102            HashSet<Resource> rangeDatatypes = new HashSet<Resource>();
103            HashSet<OntClass> rangeClasses = new HashSet<OntClass>();
104    
105            /*
106               * If keepExistingRanges is enabled, start with current ranges already assigned in the property range
107               */
108            if (this.keepExistingRanges) {
109                // Loop over all existing ranges
110                Iterator<? extends Resource> rangeIterator = property.listRange();
111                while (rangeIterator.hasNext()) {
112                    Resource range = rangeIterator.next();
113                    // See whether it is a class or not
114                    if (range.canAs(OntClass.class)) {
115                        // It is a class
116                        rangeClasses.add(range.as(OntClass.class));
117                    } else {
118                        // It is a datatype (or maybe something else?)
119                        rangeDatatypes.add(range);
120                    }
121                }
122            }
123    
124    
125            /*
126               * Search ontology instances to find out what this property is used for in practice, and then
127               * - add all found literal types to rangeDatatypes;
128               * - add those classes found to rangeClasses that are the at least once the
129               *   most specific class of an instance that this property refers to;
130               */
131            String queryString = OntologyUtils.getSparqlPrefixes(ontModel)
132                    + "select distinct ?class (datatype(?o) as ?datatype) "
133                    + "where { "
134                    + "  ?s <" + property.getURI() + "> ?o . "
135                    + "  optional { ?o rdf:type ?class } . "
136                    + "} ";
137            ontModel.enterCriticalSection(Lock.READ);
138            try {
139                Query query = QueryFactory.create(queryString, Syntax.syntaxARQ);
140                QueryExecution qexec = QueryExecutionFactory.create(query, ontModel);
141    
142                ResultSet results = qexec.execSelect();
143                while (results.hasNext()) {
144                    QuerySolution result = results.nextSolution();
145    
146                    // See what type the property refers to
147                    if (result.contains("datatype")) {
148                        // Found this property with a a typed literal value
149                        rangeDatatypes.add(result.get("datatype").as(Resource.class));
150                    } else if (result.contains("class")) {
151                        /*
152                                  *  Found this property referring to another instance of a certain class
153                                  *  We want to make sure that the property refers to a direct instance of the found class,
154                                  *  and not an instance of a *subclass* of the found class
155                                  */
156                        OntClass ontClass = result.get("class").as(OntClass.class);
157                        if (anyInstanceRefersToDirectClassInstance(ontModel, property, ontClass)) {
158                            // It does refer to a direct instance of this class, so add it
159                            rangeClasses.add(ontClass);
160                        }
161                    } else {
162                        // It refers to something else, we can't handle this right now
163                        // @TODO how to deal with unknown-class URIs / plain literals / blank nodes?
164                    }
165                }
166                qexec.close();
167            } finally {
168                ontModel.leaveCriticalSection();
169            }
170    
171            /*
172               * If owl:Thing is not allowed to be used as range, remove it from the range classes list
173               */
174            if (!allowThingRange) {
175                rangeClasses.remove(OntologyUtils.getOwlThing(ontModel));
176            }
177    
178            /*
179               * For each class found to be in range of this property, filter out all subclasses
180               * that have this class in its inheritance chain.
181               * Subclasses that have multiple (indirect) superclasses are retained,
182               * unless *all* of its ancestry branches are found within the property range.
183               *
184               */
185            for (OntClass ontClass : rangeClasses.toArray(new OntClass[]{})) {
186                if (OntologyUtils.containsCompleteClassSuperset(ontClass, rangeClasses)) {
187                    rangeClasses.remove(ontClass);
188                }
189            }
190    
191            // Combine rangeClasses and rangeDatatypes into the final range list
192            HashSet<Resource> ranges = new HashSet<Resource>();
193            ranges.addAll(rangeDatatypes);
194            ranges.addAll(rangeClasses);
195    
196            // Return the final range list
197            return ranges;
198        }
199    
200        /**
201         * Find out whether there is instance that uses this property to refer to a *direct* instance of this class.
202         * A direct instance is an instance that has a certain class *but not* any of its subclasses.
203         */
204        private boolean anyInstanceRefersToDirectClassInstance(OntModel ontModel, OntProperty property, OntClass ontClass) {
205            // Find all descendants of this class that we want to exclude
206            Collection<OntClass> descendants = OntologyUtils.listClassDescendants(ontClass);
207    
208            if (descendants.isEmpty()) {
209                // If the class has no descendants at all, all instances must be direct instances
210                return true;
211    
212            } else {
213                /*
214                    *  If the class does have descendants, find instances referred to by the property that
215                    *  are an instance of the given class but not an instance of any of its descendants.
216                    */
217                String queryString = OntologyUtils.getSparqlPrefixes(ontModel)
218                        + "select (count(?s) as ?count) "
219                        + "where { "
220                        + "  ?s <" + property.getURI() + "> ?o . "
221                        + "  ?o rdf:type <" + ontClass.getURI() + "> . ";
222                for (OntClass descendant : descendants) {
223                    queryString += "  unsaid { ?o rdf:type <" + descendant.getURI() + "> } . ";
224                }
225                queryString += "} ";
226    
227                ontModel.enterCriticalSection(Lock.READ);
228                int instanceCount;
229                try {
230                    Query query = QueryFactory.create(queryString, Syntax.syntaxARQ);
231                    QueryExecution qexec = QueryExecutionFactory.create(query, ontModel);
232    
233                    ResultSet results = qexec.execSelect();
234                    if (results.hasNext()) {
235                        instanceCount = results.nextSolution().getLiteral("count").getInt();
236                    } else {
237                        instanceCount = 0;
238                    }
239                    OntologyUtils.closeIterator(results);
240                    qexec.close();
241                } finally {
242                    ontModel.leaveCriticalSection();
243                }
244    
245                if (instanceCount > 0) {
246                    // We did find any direct class instances referred to using property
247                    return true;
248                } else {
249                    // We didn't find any of those instances
250                    return false;
251                }
252            }
253        }
254    
255        /**
256         * Format a debug message containing a message and a list of resource localNames
257         */
258        private String getLogMessage(String message, Collection<? extends Resource> resources) {
259            String result = "  - " + message + ": ";
260            if (resources.size() > 0) {
261                int counter = 0;
262                for (Resource resource : resources) {
263                    result += (counter++ > 0 ? ", " : "") + resource.getLocalName();
264                }
265            } else {
266                result += "none";
267            }
268            return result;
269        }
270    }