001 /*
002 * AnnotationSet.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Hamish Cunningham, 7/Feb/2000
013 *
014 * $Id: AnnotationSet.java 12307 2010-02-25 14:09:38Z ian_roberts $
015 */
016
017 package gate;
018
019 import gate.event.AnnotationSetListener;
020 import gate.event.GateListener;
021 import gate.util.InvalidOffsetException;
022
023 import java.io.Serializable;
024 import java.util.Set;
025
026 /**
027 * <p>
028 * A set of annotations on a document. In addition to the methods
029 * provided by {@link SimpleAnnotationSet}, Annotation sets support
030 * access to subsets of the annotations in the set by various more
031 * complex criteria. Annotation sets are attached to documents - they
032 * cannot be constructed directly, but are obtained via the
033 * <code>getAnnotations</code> methods of {@link Document}.
034 * </p>
035 *
036 * <p>
037 * This interface provides methods to extract subsets of annotations
038 * from the current set given various constraints. Note that the
039 * annotation sets returned by these <code>get</code> methods are
040 * immutable snapshots of the set as it was at the time the method was
041 * called. Subsequent changes to the underlying set are not reflected in
042 * the subset view.
043 * </p>
044 *
045 * <p>
046 * This interface extends {@link java.util.Set}<Annotation>, so
047 * can be used anywhere a Java Collections Framework <code>Set</code>
048 * or <code>Collection</code> is required.
049 * </p>
050 */
051 public interface AnnotationSet extends SimpleAnnotationSet, Serializable {
052 /**
053 * Create and add an annotation with a pre-existing ID. This method
054 * should only be used when you have existing annotations with unique
055 * IDs, for example when reading the full contents of an annotation
056 * set from some saved representation. In normal use you should use
057 * the method
058 * {@link SimpleAnnotationSet#add(Long, Long, String, FeatureMap)},
059 * which allows the set to assign a unique ID.
060 *
061 * @param id the ID for the new annotation
062 * @param start the start offset for the new annotation
063 * @param end the end offset for the new annotation
064 * @param type the annotation type
065 * @param features the features for the new annotation
066 * @return the newly generated annotation ID, which will be distinct
067 * from all other annotations in this set.
068 * @throws InvalidOffsetException if the start or end offsets are
069 * <code>null</code>, or if the start offset is less than
070 * 0 or the end offset is greater than the length of the
071 * document.
072 */
073 public void add(Integer id, Long start, Long end, String type,
074 FeatureMap features) throws InvalidOffsetException;
075
076 /**
077 * <p>
078 * Select annotations by type and feature values. This will return an
079 * annotation set containing just those annotations of a particular
080 * type which have features with specific names and values. (It will
081 * also return annotations that have features besides those specified,
082 * but it will not return any annotations that do not have all the
083 * specified feature-value pairs.)
084 * </p>
085 *
086 * <p>
087 * However, if constraints contains a feature whose value is equal to
088 * {@link gate.creole.ANNIEConstants#LOOKUP_CLASS_FEATURE_NAME} (which
089 * is normally "class"), then GATE will attempt to match that feature
090 * using an ontology which it will try to retreive from a feature
091 * {@link gate.creole.ANNIEConstants#LOOKUP_ONTOLOGY_FEATURE_NAME} on
092 * both the annotation and in <code>constraints</code>. If these do
093 * not return identical ontologies, or if either the annotation or
094 * constraints does not contain an ontology, then matching will fail,
095 * and the annotation will not be added. In summary, this method will
096 * not work normally for features with the name "class".
097 * </p>
098 *
099 * @param type The type of the annotations to return.
100 * @param constraints A feature map containing all of the feature
101 * value pairs that the annotation must have in order for
102 * them to be returned.
103 * @return An annotation set containing only those annotations with
104 * the given name and which have the specified set of
105 * feature-value pairs. If no annotations match the
106 * constraints, an empty set is returned. The returned set is
107 * immutable.
108 */
109 public AnnotationSet get(String type, FeatureMap constraints);
110
111 /**
112 * Select annotations by type and feature names It returns all
113 * annotations of the given type that have the given set of features,
114 * regardless of their concrete values If the type == null, then
115 * select regardless of type
116 *
117 * @param type the annotation type to return. If <code>null</code>
118 * then all annotation types are searched.
119 * @param featureNames the feature names which an annotation must have
120 * to be matched.
121 * @return An annotation set containing only those annotations with
122 * the given type and at least the given features. If no
123 * annotations match these constraints, an empty set is
124 * returned. The returned set is immutable.
125 */
126 public AnnotationSet get(String type, Set featureNames);
127
128 /**
129 * Select annotations by type, features and offset. This method is a
130 * combination of {@link #get(Long)} and
131 * {@link #get(String, FeatureMap)}, in that it matches annotations
132 * by type and feature constraints but considers only those
133 * annotations that start as close as possible to the right of the
134 * given offset.
135 *
136 * @param type the annotation type to search for
137 * @param constraints the set of features an annotation must have to
138 * be matched
139 * @param offset the offset at which to anchor the search.
140 * @return An annotation set containing those annotations that match
141 * the constraints, or an empty set if there are no such
142 * annotations. The returned set is immutable.
143 */
144 public AnnotationSet get(String type, FeatureMap constraints, Long offset);
145
146 /**
147 * Select annotations by offset. This returns the set of annotations
148 * whose start node is the least such that it is greater than or equal
149 * to <code>offset</code>. In other words it finds the first
150 * annotation that starts at or after the given offset and returns all
151 * annotations which start at the same place.
152 *
153 * @param offset the offset at which to start the search.
154 * @return a set of annotations, all of which start at the same offset
155 * >= <code>offset</code>. The returned set is
156 * immutable.
157 */
158 public AnnotationSet get(Long offset);
159
160 /**
161 * Select annotations by offset. This returns the set of annotations
162 * that overlap totaly or partially the interval defined by the two
163 * provided offsets, i.e. that start strictly before
164 * <code>endOffset</code> and end strictly after
165 * <code>startOffset</code>.
166 *
167 * @param startOffset the start of the interval
168 * @param endOffset the end of the interval
169 * @return the set of annotations that overlap the given interval, or
170 * an empty set if there are no such annotations. The returned
171 * set is immutable.
172 */
173 public AnnotationSet get(Long startOffset, Long endOffset);
174
175 /**
176 * Select annotations by offset and type. This returns the set of
177 * annotations that overlap totaly or partially the interval defined
178 * by the two provided offsets and are of the given type. This method
179 * is effectively a combination of {@link #get(Long, Long)} and
180 * {@link SimpleAnnotationSet#get(String)} but may admit more
181 * efficient implementation.
182 *
183 * @param type the annotation type to search for
184 * @param startOffset the start of the interval
185 * @param endOffset the end of the interval
186 * @return the set of annotations of the given type that overlap the
187 * given interval, or an empty set if no such annotations
188 * exist. The returned set is immutable.
189 */
190 public AnnotationSet get(String type, Long startOffset, Long endOffset);
191
192 /**
193 * Select annotations of the given type that complete span the range.
194 * Formally, for any annotation a, a will be included in the return
195 * set if:
196 * <ul>
197 * <li>a.getStartNode().getOffset() <= startOffset</li>
198 * <li>and</li>
199 * <li>a.getEndNode().getOffset() >= endOffset</li>
200 *
201 * @param neededType Type of annotation to return. If empty, all
202 * annotation types will be returned.
203 * @param startOffset the start of the interval
204 * @param endOffset the end of the interval
205 * @return the set of annotations matching the parameters, or an empty
206 * set if no such annotations exist. The returned set is
207 * immutable.
208 */
209 public AnnotationSet getCovering(String neededType, Long startOffset, Long endOffset);
210
211 /**
212 * Select annotations by offset. This returns the set of annotations
213 * that are contained in the interval defined by the two provided
214 * offsets. The difference with get(startOffset, endOffset) is that
215 * the latter also provides annotations that have a span which covers
216 * completely and is bigger than the given one. Here we only get the
217 * annotations between the two offsets. Formally, all annotations
218 * are returned whose start position is >= <code>startOffset</code>
219 * and < <code>endOffset</code> and whose end position is
220 * <= <code>endOffset</code>.
221 *
222 * @param startOffset the start of the interval, inclusive
223 * @param endOffset the end of the interval, inclusive
224 * @return the set of annotations from this set contained completely
225 * inside the interval, or an empty set if no such annotations
226 * exist. The returned set is immutable.
227 */
228 public AnnotationSet getContained(Long startOffset, Long endOffset);
229
230 /**
231 * Get the node with the smallest offset
232 */
233 public Node firstNode();
234
235 /**
236 * Get the node with the largest offset
237 */
238 public Node lastNode();
239
240 /**
241 * Get the first node that is relevant for this annotation set and
242 * which has the offset larger than the one of the node provided.
243 */
244 public Node nextNode(Node node);
245
246 public void addAnnotationSetListener(AnnotationSetListener l);
247
248 public void removeAnnotationSetListener(AnnotationSetListener l);
249
250 public void addGateListener(GateListener l);
251
252 public void removeGateListener(GateListener l);
253
254 } // interface AnnotationSet
|