001 /*
002 * Utils.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution annotationSet file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Johann Petrak, 2010-02-05
013 *
014 * $Id: Main.java 12006 2009-12-01 17:24:28Z thomas_heitz $
015 */
016
017 package gate;
018
019 import gate.annotation.AnnotationSetImpl;
020 import gate.annotation.ImmutableAnnotationSetImpl;
021 import gate.util.GateRuntimeException;
022 import gate.util.OffsetComparator;
023 import java.util.ArrayList;
024 import java.util.Collections;
025 import java.util.HashSet;
026 import java.util.Iterator;
027 import java.util.List;
028 import java.util.Map;
029 import java.util.Set;
030
031 import org.apache.log4j.Logger;
032 import org.apache.log4j.Level;
033
034 /**
035 * Various utility methods to make often-needed tasks more easy and
036 * using up less code. In Java code (or JAPE grammars) you may wish to
037 * <code>import static gate.Utils.*</code> to access these methods without
038 * having to qualify them with a class name. In Groovy code, this class can be
039 * used as a category to inject each utility method into the class of its first
040 * argument, e.g.
041 * <pre>
042 * Document doc = // ...
043 * Annotation ann = // ...
044 * use(gate.Utils) {
045 * println "Annotation has ${ann.length()} characters"
046 * println "and covers the string \"${doc.stringFor(ann)}\""
047 * }
048 * </pre>
049 *
050 * @author Johann Petrak, Ian Roberts
051 */
052 public class Utils {
053 /**
054 * Return the length of the document content covered by an Annotation as an
055 * int -- if the content is too long for an int, the method will throw
056 * a GateRuntimeException. Use getLengthLong(SimpleAnnotation ann) if
057 * this situation could occur.
058 * @param ann the annotation for which to determine the length
059 * @return the length of the document content covered by this annotation.
060 */
061 public static int length(SimpleAnnotation ann) {
062 long len = lengthLong(ann);
063 if (len > java.lang.Integer.MAX_VALUE) {
064 throw new GateRuntimeException(
065 "Length of annotation too big to be returned as an int: "+len);
066 } else {
067 return (int)len;
068 }
069 }
070
071 /**
072 * Return the length of the document content covered by an Annotation as a
073 * long.
074 * @param ann the annotation for which to determine the length
075 * @return the length of the document content covered by this annotation.
076 */
077 public static long lengthLong(SimpleAnnotation ann) {
078 return ann.getEndNode().getOffset() -
079 ann.getStartNode().getOffset();
080 }
081
082 /**
083 * Return the length of the document as an
084 * int -- if the content is too long for an int, the method will throw a
085 * GateRuntimeException. Use getLengthLong(Document doc) if
086 * this situation could occur.
087 * @param doc the document for which to determine the length
088 * @return the length of the document content.
089 */
090 public static int length(Document doc) {
091 long len = doc.getContent().size();
092 if (len > java.lang.Integer.MAX_VALUE) {
093 throw new GateRuntimeException(
094 "Length of document too big to be returned as an int: "+len);
095 } else {
096 return (int)len;
097 }
098 }
099
100 /**
101 * Return the length of the document as a long.
102 * @param doc the document for which to determine the length
103 * @return the length of the document content.
104 */
105 public static long lengthLong(Document doc) {
106 return doc.getContent().size();
107 }
108
109 /**
110 * Return the DocumentContent corresponding to the annotation.
111 * <p>
112 * Note: the DocumentContent object returned will also contain the
113 * original content which can be accessed using the getOriginalContent()
114 * method.
115 * @param doc the document from which to extract the content
116 * @param ann the annotation for which to return the content.
117 * @return a DocumentContent representing the content spanned by the annotation.
118 */
119 public static DocumentContent contentFor(
120 SimpleDocument doc, SimpleAnnotation ann) {
121 try {
122 return doc.getContent().getContent(
123 ann.getStartNode().getOffset(),
124 ann.getEndNode().getOffset());
125 } catch(gate.util.InvalidOffsetException ex) {
126 throw new GateRuntimeException(ex.getMessage());
127 }
128 }
129
130 /**
131 * Return the document text as a String corresponding to the annotation.
132 * @param doc the document from which to extract the document text
133 * @param ann the annotation for which to return the text.
134 * @return a String representing the text content spanned by the annotation.
135 */
136 public static String stringFor(
137 Document doc, SimpleAnnotation ann) {
138 try {
139 return doc.getContent().getContent(
140 ann.getStartNode().getOffset(),
141 ann.getEndNode().getOffset()).toString();
142 } catch(gate.util.InvalidOffsetException ex) {
143 throw new GateRuntimeException(ex.getMessage());
144 }
145 }
146
147 /**
148 * Returns the document text between the provided offsets.
149 * @param doc the document from which to extract the document text
150 * @param start the start offset
151 * @param end the end offset
152 * @return document text between the provided offsets
153 */
154 public static String stringFor(
155 Document doc, Long start, Long end) {
156 try {
157 return doc.getContent().getContent(
158 start,
159 end).toString();
160 } catch(gate.util.InvalidOffsetException ex) {
161 throw new GateRuntimeException(ex.getMessage());
162 }
163 }
164
165 /**
166 * Return the DocumentContent covered by the given annotation set.
167 * <p>
168 * Note: the DocumentContent object returned will also contain the
169 * original content which can be accessed using the getOriginalContent()
170 * method.
171 * @param doc the document from which to extract the content
172 * @param anns the annotation set for which to return the content.
173 * @return a DocumentContent representing the content spanned by the
174 * annotation set.
175 */
176 public static DocumentContent contentFor(
177 SimpleDocument doc, AnnotationSet anns) {
178 try {
179 return doc.getContent().getContent(
180 anns.firstNode().getOffset(),
181 anns.lastNode().getOffset());
182 } catch(gate.util.InvalidOffsetException ex) {
183 throw new GateRuntimeException(ex.getMessage());
184 }
185 }
186
187 /**
188 * Return the document text as a String covered by the given annotation set.
189 * @param doc the document from which to extract the document text
190 * @param anns the annotation set for which to return the text.
191 * @return a String representing the text content spanned by the annotation
192 * set.
193 */
194 public static String stringFor(
195 Document doc, AnnotationSet anns) {
196 try {
197 return doc.getContent().getContent(
198 anns.firstNode().getOffset(),
199 anns.lastNode().getOffset()).toString();
200 } catch(gate.util.InvalidOffsetException ex) {
201 throw new GateRuntimeException(ex.getMessage());
202 }
203 }
204
205 /**
206 * Get the start offset of an annotation.
207 */
208 public static Long start(SimpleAnnotation a) {
209 return (a.getStartNode() == null) ? null : a.getStartNode().getOffset();
210 }
211
212 /**
213 * Get the start offset of an annotation set.
214 */
215 public static Long start(AnnotationSet as) {
216 return (as.firstNode() == null) ? null : as.firstNode().getOffset();
217 }
218
219 /**
220 * Get the start offset of a document (i.e. 0L).
221 */
222 public static Long start(SimpleDocument d) {
223 return Long.valueOf(0L);
224 }
225
226 /**
227 * Get the end offset of an annotation.
228 */
229 public static Long end(SimpleAnnotation a) {
230 return (a.getEndNode() == null) ? null : a.getEndNode().getOffset();
231 }
232
233 /**
234 * Get the end offset of an annotation set.
235 */
236 public static Long end(AnnotationSet as) {
237 return (as.lastNode() == null) ? null : as.lastNode().getOffset();
238 }
239
240 /**
241 * Get the end offset of a document.
242 */
243 public static Long end(SimpleDocument d) {
244 return d.getContent().size();
245 }
246
247 /**
248 * Return a the subset of annotations from the given annotation set
249 * that start exactly at the given offset.
250 *
251 * @param annotationSet the set of annotations from which to select
252 * @param atOffset the offset where the annoation to be returned should start
253 * @return an annotation set containing all the annotations from the original
254 * set that start at the given offset
255 */
256 public static AnnotationSet getAnnotationsAtOffset(
257 AnnotationSet annotationSet, Long atOffset) {
258 // this returns all annotations that start at this atOffset OR AFTER!
259 AnnotationSet tmp = annotationSet.get(atOffset);
260 // so lets filter ...
261 AnnotationSet ret = new AnnotationSetImpl(annotationSet.getDocument());
262 Iterator<Annotation> it = tmp.iterator();
263 while(it.hasNext()) {
264 Annotation ann = it.next();
265 if(ann.getStartNode().getOffset().equals(atOffset)) {
266 ret.add(ann);
267 }
268 }
269 return ret;
270 }
271
272 /**
273 * Get all the annotations from the source annotation set that lie within
274 * the range of the containing annotation.
275 *
276 * @param sourceAnnotationSet the annotation set from which to select
277 * @param containingAnnotation the annotation whose range must contain the
278 * selected annotations
279 * @return the AnnotationSet containing all annotations fully contained in
280 * the offset range of the containingAnnotation
281 */
282 public static AnnotationSet getContainedAnnotations(
283 AnnotationSet sourceAnnotationSet,
284 Annotation containingAnnotation) {
285 return getContainedAnnotations(sourceAnnotationSet,containingAnnotation,"");
286 }
287
288 /**
289 * Get all the annotations of type targetType
290 * from the source annotation set that lie within
291 * the range of the containing annotation.
292 *
293 * @param sourceAnnotationSet the annotation set from which to select
294 * @param containingAnnotation the annotation whose range must contain the
295 * @param targetType the type the selected annotations must have. If the
296 * empty string, no filtering on type is done.
297 * @return the AnnotationSet containing all annotations fully contained in
298 * the offset range of the containingAnnotation
299 */
300 public static AnnotationSet getContainedAnnotations(
301 AnnotationSet sourceAnnotationSet,
302 Annotation containingAnnotation,
303 String targetType) {
304 if(targetType.equals("")) {
305 return sourceAnnotationSet.getContained(
306 containingAnnotation.getStartNode().getOffset(),
307 containingAnnotation.getEndNode().getOffset());
308 } else {
309 return sourceAnnotationSet.getContained(
310 containingAnnotation.getStartNode().getOffset(),
311 containingAnnotation.getEndNode().getOffset()).get(targetType);
312 }
313 }
314
315 /**
316 * Get all the annotations from the source annotation set that lie within
317 * the range of the containing annotation set, i.e. within the offset range
318 * between the start of the first annotation in the containing set and the
319 * end of the last annotation in the annotation set. If the containing
320 * annotation set is empty, an empty set is returned.
321 *
322 * @param sourceAnnotationSet the annotation set from which to select
323 * @param containingAnnotationSet the annotation set whose range must contain
324 * the selected annotations
325 * @return the AnnotationSet containing all annotations fully contained in
326 * the offset range of the containingAnnotationSet
327 */
328 public static AnnotationSet getContainedAnnotations(
329 AnnotationSet sourceAnnotationSet,
330 AnnotationSet containingAnnotationSet) {
331 return getContainedAnnotations(sourceAnnotationSet,containingAnnotationSet,"");
332 }
333
334 /**
335 * Get all the annotations from the source annotation set with a type equal to
336 * targetType that lie within
337 * the range of the containing annotation set, i.e. within the offset range
338 * between the start of the first annotation in the containing set and the
339 * end of the last annotation in the annotation set. If the containing
340 * annotation set is empty, an empty set is returned.
341 *
342 * @param sourceAnnotationSet the annotation set from which to select
343 * @param containingAnnotationSet the annotation set whose range must contain
344 * the selected annotations
345 * @param targetType the type the selected annotations must have
346 * @return the AnnotationSet containing all annotations fully contained in
347 * the offset range of the containingAnnotationSet
348 */
349 public static AnnotationSet getContainedAnnotations(
350 AnnotationSet sourceAnnotationSet,
351 AnnotationSet containingAnnotationSet,
352 String targetType) {
353 if(containingAnnotationSet.size() == 0) {
354 return new ImmutableAnnotationSetImpl(null,null) {
355 private static final long serialVersionUID = -6703131102439043539L;
356 };
357 }
358 if(targetType.equals("")) {
359 return sourceAnnotationSet.getContained(
360 containingAnnotationSet.firstNode().getOffset(),
361 containingAnnotationSet.lastNode().getOffset());
362 } else {
363 return sourceAnnotationSet.getContained(
364 containingAnnotationSet.firstNode().getOffset(),
365 containingAnnotationSet.lastNode().getOffset()).get(targetType);
366 }
367 }
368
369
370 /**
371 * Return a List containing the annotations in the given annotation set, in
372 * document order (i.e. increasing order of start offset).
373 *
374 * @param as the annotation set
375 * @return a list containing the annotations from <code>as</code> in document
376 * order.
377 */
378 public static List<Annotation> inDocumentOrder(AnnotationSet as) {
379 List<Annotation> ret = new ArrayList<Annotation>();
380 if(as != null) {
381 ret.addAll(as);
382 Collections.sort(ret, OFFSET_COMPARATOR);
383 }
384 return ret;
385 }
386
387 /**
388 * A single instance of {@link OffsetComparator} that can be used by any code
389 * that requires one.
390 */
391 public static final OffsetComparator OFFSET_COMPARATOR =
392 new OffsetComparator();
393
394 /**
395 * Create a feature map from an array of values. The array must have an even
396 * number of items, alternating keys and values i.e. [key1, value1, key2,
397 * value2, ...].
398 *
399 * @param values an even number of items, alternating keys and values.
400 * @return a feature map containing the given items.
401 */
402 public static FeatureMap featureMap(Object... values) {
403 FeatureMap fm = Factory.newFeatureMap();
404 if(values != null) {
405 for(int i = 0; i < values.length; i++) {
406 fm.put(values[i], values[++i]);
407 }
408 }
409 return fm;
410 }
411
412 /**
413 * Create a feature map from an existing map (typically one that does not
414 * itself implement FeatureMap).
415 *
416 * @param map the map to convert.
417 * @return a new FeatureMap containing the same mappings as the source map.
418 */
419 public static FeatureMap toFeatureMap(Map map) {
420 FeatureMap fm = Factory.newFeatureMap();
421 fm.putAll(map);
422 return fm;
423 }
424
425 /**
426 * Issue a message to the log but only if the same message has not
427 * been logged already in the same GATE session.
428 * This is intended for explanations or warnings that should not be
429 * repeated every time the same situation occurs.
430 *
431 * @param logger - the logger instance to use
432 * @param level - the severity level for the message
433 * @param message - the message itself
434 */
435 public static void logOnce (Logger logger, Level level, String message) {
436 if(!alreadyLoggedMessages.contains(message)) {
437 logger.log(level, message);
438 alreadyLoggedMessages.add(message);
439 }
440 }
441
442 /**
443 * Check if a message has already been logged or shown. This does not log
444 * or show anything but only stores the message as one that has been shown
445 * already if necessary and returns if the message has been shown or not.
446 *
447 * @param message - the message that should only be logged or shown once
448 * @return - true if the message has already been logged or checked with
449 * this method.
450 *
451 */
452 public static boolean isLoggedOnce(String message) {
453 boolean isThere = alreadyLoggedMessages.contains(message);
454 if(!isThere) {
455 alreadyLoggedMessages.add(message);
456 }
457 return isThere;
458 }
459
460 private static final Set<String> alreadyLoggedMessages =
461 Collections.synchronizedSet(new HashSet<String>());
462
463 }
|