001 package nl.tudelft.tbm.eeni.owlstructure.processor;
002
003 import com.hp.hpl.jena.ontology.OntClass;
004 import com.hp.hpl.jena.ontology.OntModel;
005 import com.hp.hpl.jena.ontology.OntProperty;
006 import com.hp.hpl.jena.query.*;
007 import com.hp.hpl.jena.rdf.model.Resource;
008 import com.hp.hpl.jena.shared.Lock;
009 import nl.tudelft.tbm.eeni.owlstructure.utils.CollectionUtils;
010 import nl.tudelft.tbm.eeni.owlstructure.utils.OntologyUtils;
011 import org.apache.commons.logging.Log;
012 import org.apache.commons.logging.LogFactory;
013
014 import java.util.Collection;
015 import java.util.HashSet;
016 import java.util.Iterator;
017
018 /**
019 * Set or extend property ranges by looking at instances in a given ontology
020 */
021 public class PropertyRangeInferer implements IOntologyProcessor {
022
023 static Log log = LogFactory.getLog(FunctionalPropertyInferer.class);
024
025 private boolean keepExistingRanges;
026 private boolean allowThingRange;
027
028 /**
029 * Creates a new property range inferer, that infers property ranges by
030 * looking at instances in the ontology.
031 *
032 * @param keepExistingRanges Whether to keep ranges already defined in the ontology.
033 * An existing owl:Thing range will still be removed when
034 * keepExistingRanges is set to false. Defaults to true.
035 * @param allowThingRange Whether the range may contain the class owl:Thing. Defaults to false.
036 */
037 public PropertyRangeInferer(boolean keepExistingRanges, boolean allowThingRange) {
038 this.keepExistingRanges = keepExistingRanges;
039 this.allowThingRange = allowThingRange;
040 }
041
042 /**
043 * Creates a new property range inferer, that infers property ranges by
044 * looking at instances in the ontology.
045 *
046 * @param keepExistingRanges Whether to keep ranges already defined in the ontology.
047 * An existing owl:Thing range will still be removed when
048 * keepExistingRanges is set to false. Defaults to true.
049 */
050 public PropertyRangeInferer(boolean keepExistingRanges) {
051 this(keepExistingRanges, false);
052 }
053
054 /**
055 * Creates a new property range inferer, that infers property ranges by
056 * looking at instances in the ontology.
057 */
058 public PropertyRangeInferer() {
059 this(true, false);
060 }
061
062 /**
063 * Run the propery range inferer on all classes in the given ontology
064 *
065 * @param ontModel The ontology model to work on
066 */
067 @Override
068 public OntModel process(OntModel ontModel) {
069 // Loop over all properties
070 Collection<OntProperty> properties = ontModel.listAllOntProperties().toList();
071 for (OntProperty property : properties) {
072 // Find existing ranges
073 Collection<Resource> oldRanges = new HashSet<Resource>(property.listRange().toList());
074 // Find what classes/datatypes this property is used to refer to
075 Collection<Resource> newRanges = findPropertyRanges(ontModel, property);
076
077 // Remove existing ranges from property (retained ranges will be included in newRanges and thus re-added)
078 for (Resource range : oldRanges) {
079 property.removeRange(range);
080 }
081 // Add new ranges to property
082 for (Resource range : newRanges) {
083 property.addRange(range);
084 }
085
086 // Debug output
087 log.info("Property range inference for property: " + property.getLocalName() + "\n"
088 + getLogMessage("retaining range(s)", CollectionUtils.intersectCollections(oldRanges, newRanges)) + "\n"
089 + getLogMessage("adding range(s)", CollectionUtils.subtractCollections(newRanges, oldRanges)) + "\n"
090 + getLogMessage("removing range(s)", CollectionUtils.subtractCollections(oldRanges, newRanges)));
091 }
092
093 return ontModel;
094 }
095
096 /**
097 * Given a certain property, list what datatypes (boolean, double, string, etc)
098 * or classes are used for the values that instances refer to using this property.
099 */
100 private Collection<Resource> findPropertyRanges(OntModel ontModel, OntProperty property) {
101 // Use separate lists to store datatypes and classes in the property range
102 HashSet<Resource> rangeDatatypes = new HashSet<Resource>();
103 HashSet<OntClass> rangeClasses = new HashSet<OntClass>();
104
105 /*
106 * If keepExistingRanges is enabled, start with current ranges already assigned in the property range
107 */
108 if (this.keepExistingRanges) {
109 // Loop over all existing ranges
110 Iterator<? extends Resource> rangeIterator = property.listRange();
111 while (rangeIterator.hasNext()) {
112 Resource range = rangeIterator.next();
113 // See whether it is a class or not
114 if (range.canAs(OntClass.class)) {
115 // It is a class
116 rangeClasses.add(range.as(OntClass.class));
117 } else {
118 // It is a datatype (or maybe something else?)
119 rangeDatatypes.add(range);
120 }
121 }
122 }
123
124
125 /*
126 * Search ontology instances to find out what this property is used for in practice, and then
127 * - add all found literal types to rangeDatatypes;
128 * - add those classes found to rangeClasses that are the at least once the
129 * most specific class of an instance that this property refers to;
130 */
131 String queryString = OntologyUtils.getSparqlPrefixes(ontModel)
132 + "select distinct ?class (datatype(?o) as ?datatype) "
133 + "where { "
134 + " ?s <" + property.getURI() + "> ?o . "
135 + " optional { ?o rdf:type ?class } . "
136 + "} ";
137 ontModel.enterCriticalSection(Lock.READ);
138 try {
139 Query query = QueryFactory.create(queryString, Syntax.syntaxARQ);
140 QueryExecution qexec = QueryExecutionFactory.create(query, ontModel);
141
142 ResultSet results = qexec.execSelect();
143 while (results.hasNext()) {
144 QuerySolution result = results.nextSolution();
145
146 // See what type the property refers to
147 if (result.contains("datatype")) {
148 // Found this property with a a typed literal value
149 rangeDatatypes.add(result.get("datatype").as(Resource.class));
150 } else if (result.contains("class")) {
151 /*
152 * Found this property referring to another instance of a certain class
153 * We want to make sure that the property refers to a direct instance of the found class,
154 * and not an instance of a *subclass* of the found class
155 */
156 OntClass ontClass = result.get("class").as(OntClass.class);
157 if (anyInstanceRefersToDirectClassInstance(ontModel, property, ontClass)) {
158 // It does refer to a direct instance of this class, so add it
159 rangeClasses.add(ontClass);
160 }
161 } else {
162 // It refers to something else, we can't handle this right now
163 // @TODO how to deal with unknown-class URIs / plain literals / blank nodes?
164 }
165 }
166 qexec.close();
167 } finally {
168 ontModel.leaveCriticalSection();
169 }
170
171 /*
172 * If owl:Thing is not allowed to be used as range, remove it from the range classes list
173 */
174 if (!allowThingRange) {
175 rangeClasses.remove(OntologyUtils.getOwlThing(ontModel));
176 }
177
178 /*
179 * For each class found to be in range of this property, filter out all subclasses
180 * that have this class in its inheritance chain.
181 * Subclasses that have multiple (indirect) superclasses are retained,
182 * unless *all* of its ancestry branches are found within the property range.
183 *
184 */
185 for (OntClass ontClass : rangeClasses.toArray(new OntClass[]{})) {
186 if (OntologyUtils.containsCompleteClassSuperset(ontClass, rangeClasses)) {
187 rangeClasses.remove(ontClass);
188 }
189 }
190
191 // Combine rangeClasses and rangeDatatypes into the final range list
192 HashSet<Resource> ranges = new HashSet<Resource>();
193 ranges.addAll(rangeDatatypes);
194 ranges.addAll(rangeClasses);
195
196 // Return the final range list
197 return ranges;
198 }
199
200 /**
201 * Find out whether there is instance that uses this property to refer to a *direct* instance of this class.
202 * A direct instance is an instance that has a certain class *but not* any of its subclasses.
203 */
204 private boolean anyInstanceRefersToDirectClassInstance(OntModel ontModel, OntProperty property, OntClass ontClass) {
205 // Find all descendants of this class that we want to exclude
206 Collection<OntClass> descendants = OntologyUtils.listClassDescendants(ontClass);
207
208 if (descendants.isEmpty()) {
209 // If the class has no descendants at all, all instances must be direct instances
210 return true;
211
212 } else {
213 /*
214 * If the class does have descendants, find instances referred to by the property that
215 * are an instance of the given class but not an instance of any of its descendants.
216 */
217 String queryString = OntologyUtils.getSparqlPrefixes(ontModel)
218 + "select (count(?s) as ?count) "
219 + "where { "
220 + " ?s <" + property.getURI() + "> ?o . "
221 + " ?o rdf:type <" + ontClass.getURI() + "> . ";
222 for (OntClass descendant : descendants) {
223 queryString += " unsaid { ?o rdf:type <" + descendant.getURI() + "> } . ";
224 }
225 queryString += "} ";
226
227 ontModel.enterCriticalSection(Lock.READ);
228 int instanceCount;
229 try {
230 Query query = QueryFactory.create(queryString, Syntax.syntaxARQ);
231 QueryExecution qexec = QueryExecutionFactory.create(query, ontModel);
232
233 ResultSet results = qexec.execSelect();
234 if (results.hasNext()) {
235 instanceCount = results.nextSolution().getLiteral("count").getInt();
236 } else {
237 instanceCount = 0;
238 }
239 OntologyUtils.closeIterator(results);
240 qexec.close();
241 } finally {
242 ontModel.leaveCriticalSection();
243 }
244
245 if (instanceCount > 0) {
246 // We did find any direct class instances referred to using property
247 return true;
248 } else {
249 // We didn't find any of those instances
250 return false;
251 }
252 }
253 }
254
255 /**
256 * Format a debug message containing a message and a list of resource localNames
257 */
258 private String getLogMessage(String message, Collection<? extends Resource> resources) {
259 String result = " - " + message + ": ";
260 if (resources.size() > 0) {
261 int counter = 0;
262 for (Resource resource : resources) {
263 result += (counter++ > 0 ? ", " : "") + resource.getLocalName();
264 }
265 } else {
266 result += "none";
267 }
268 return result;
269 }
270 }