001 /*
002 * AnnotationDeletePR.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Kalina Bontcheva, 19/10/2001
013 *
014 * $Id: AnnotationDeletePR.java 12700 2010-05-28 12:38:07Z johann_p $
015 */
016
017 package gate.creole.annotdelete;
018
019 import java.util.*;
020
021 import gate.*;
022 import gate.creole.*;
023 import gate.creole.metadata.CreoleParameter;
024 import gate.creole.metadata.CreoleResource;
025 import gate.creole.metadata.Optional;
026 import gate.creole.metadata.RunTime;
027 import gate.util.GateRuntimeException;
028
029 /**
030 * This class is the implementation of a processing resource which
031 * deletes all annotations and sets other than 'original markups'.
032 * If put at the start of an application, it'll ensure that the
033 * document is restored to its clean state before being processed.
034 */
035 @CreoleResource(name = "Document Reset PR",
036 comment = "Remove named annotation sets or reset the default annotation set")
037 public class AnnotationDeletePR extends AbstractLanguageAnalyser
038 implements ProcessingResource {
039
040 public static final String
041 TRANSD_DOCUMENT_PARAMETER_NAME = "document";
042
043 public static final String
044 TRANSD_ANNOT_TYPES_PARAMETER_NAME = "annotationTypes";
045
046 public static final String
047 TRANSD_SETS_KEEP_PARAMETER_NAME = "setsToKeep";
048
049 public static final String
050 TRANSD_SETS_KEEP_ORIGIANL_MARKUPS_ANNOT_SET = "keppOriginalMarkupsAS";
051
052 protected String markupSetName = GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME;
053 protected List annotationTypes;
054 protected List setsToKeep;
055 protected List<String> setsToRemove = null;
056 protected Boolean keepOriginalMarkupsAS;
057
058 /**
059 * This parameter specifies the names of sets to remove or reset. If this
060 * list is empty or null, it will be ignored. If this list is not empty,
061 * all the other parameters of this PR are ignored. In order to include
062 * the default annotation set in this list, add a list entry that is either
063 * null or an empty String.
064 * @param setsToRemove a List of String that contains the names of
065 * annotation sets to remove.
066 */
067 @RunTime
068 @Optional
069 @CreoleParameter(
070 comment = "A list of annotation set names to reset/remove. If non-empty, ignore the parameters which specify what to keep"
071 )
072 public void setSetsToRemove(List<String> setsToRemove) {
073 this.setsToRemove = setsToRemove;
074 }
075 public List<String> getSetsToRemove() {
076 return this.setsToRemove;
077 }
078
079
080 /** Initialise this resource, and return it. */
081 public Resource init() throws ResourceInstantiationException
082 {
083 return super.init();
084 } // init()
085
086 /**
087 * Reinitialises the processing resource. After calling this method the
088 * resource should be in the state it is after calling init.
089 * If the resource depends on external resources (such as rules files) then
090 * the resource will re-read those resources. If the data used to create
091 * the resource has changed since the resource has been created then the
092 * resource will change too after calling reInit().
093 */
094 public void reInit() throws ResourceInstantiationException
095 {
096 init();
097 } // reInit()
098
099 /** Run the resource. */
100 public void execute() throws ExecutionException {
101
102 if(document == null)
103 throw new GateRuntimeException("No document to process!");
104
105
106 Map matchesMap = null;
107 Object matchesMapObject = document.getFeatures().get(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME);
108 if(matchesMapObject instanceof Map) {
109 matchesMap = (Map) matchesMapObject;
110 }
111
112 if(setsToRemove != null && !setsToRemove.isEmpty()) {
113 // just remove or empty the sets in this list and ignore
114 // everything else
115 for(String setName : setsToRemove) {
116 if(setName == null || setName.equals("")) {
117 // clear the default annotation set
118 if (annotationTypes == null || annotationTypes.isEmpty()) {
119 document.getAnnotations().clear();
120 removeFromDocumentCorefData( (String)null, matchesMap);
121 } else {
122 removeSubSet(document.getAnnotations(), matchesMap);
123 }
124 } else {
125 // remove this named set
126 if (annotationTypes == null || annotationTypes.isEmpty()) {
127 document.removeAnnotationSet(setName);
128 removeFromDocumentCorefData( (String) setName, matchesMap);
129 } else {
130 removeSubSet(document.getAnnotations(setName), matchesMap);
131 }
132 }
133 }
134 if(matchesMap != null) {
135 document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME,
136 matchesMap);
137 }
138 } else {
139 // ignore the setsToRemove parameter and process according to
140 // the other parameters
141
142 // determine which sets to keep
143 List keepSets = new ArrayList();
144 if(setsToKeep != null) keepSets.addAll(setsToKeep);
145 if(keepOriginalMarkupsAS.booleanValue() &&
146 !keepSets.contains(markupSetName)) {
147 keepSets.add(markupSetName);
148 }
149
150 //Unless we've been asked to keep it, first clear the default set,
151 //which cannot be removed
152 if(!keepSets.contains(null) && !keepSets.contains("")) {
153 if (annotationTypes == null || annotationTypes.isEmpty()) {
154 document.getAnnotations().clear();
155 removeFromDocumentCorefData( (String)null, matchesMap);
156 } else {
157 removeSubSet(document.getAnnotations(), matchesMap);
158 }
159 }
160
161 //get the names of all sets
162 Map namedSets = document.getNamedAnnotationSets();
163 //nothing left to do if there are no named sets
164 if (namedSets != null && !namedSets.isEmpty()) {
165 //loop through the sets and delete them all unless
166 //we've been asked to keep them
167 List setNames = new ArrayList(namedSets.keySet());
168 Iterator iter = setNames.iterator();
169 String setName;
170
171 while (iter.hasNext()) {
172 setName = (String) iter.next();
173 //check first whether this is the original markups or one of the sets
174 //that we want to keep
175 if (setName != null) {
176 // skip named sets from setsToKeep
177 if(keepSets.contains(setName)) continue;
178
179 if (annotationTypes == null || annotationTypes.isEmpty()) {
180 document.removeAnnotationSet(setName);
181 removeFromDocumentCorefData( (String) setName, matchesMap);
182 } else {
183 removeSubSet(document.getAnnotations(setName), matchesMap);
184 }
185 }//if
186 }
187 }
188
189 // and finally we add it to the document
190 if(matchesMap != null) {
191 document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME,
192 matchesMap);
193 }
194 } // if(setsToRemove != null && !setsToRemove.isEmpty())
195 } // execute()
196
197 // method to undate the Document-Coref-data
198 private void removeFromDocumentCorefData(String currentSet, Map matchesMap) {
199 if(matchesMap == null)
200 return;
201
202 // if this is defaultAnnotationSet, we cannot remove this
203 if(currentSet == null) {
204 java.util.List matches = (java.util.List) matchesMap.get(currentSet);
205 if (matches == null || matches.size() == 0) {
206 // do nothing
207 return;
208 }
209 else {
210 matchesMap.put(currentSet, new java.util.ArrayList());
211 }
212 } else {
213 // we remove this set from the Coref Data
214 matchesMap.remove(currentSet);
215 }
216 }
217
218 // method to update the Document-Coref-data
219 private void removeAnnotationsFromCorefData(AnnotationSet annotations, String setName, Map matchesMap) {
220 if(matchesMap == null) {
221 return;
222 }
223
224 java.util.List matches = (java.util.List) matchesMap.get(setName);
225 if(matches == null)
226 return;
227
228 // each element in the matches is a group of annotation IDs
229 // so for each annotation we will have to traverse through all the lists and
230 // find out the annotation and remove it
231 ArrayList<Annotation> annots = new ArrayList<Annotation>(annotations);
232 for(int i=0; i<annots.size(); i++) {
233 Annotation toRemove = annots.get(i);
234 Iterator idIters = matches.iterator();
235 ArrayList ids = new ArrayList();
236 while(idIters.hasNext()) {
237 ids = (ArrayList) idIters.next();
238 if(ids.remove(toRemove.getId())) {
239 // yes removed
240 break;
241 }
242 }
243 if(ids.size()==0) {
244 matches.remove(ids);
245 }
246 }
247 // and finally see if there is any group available
248 if(matches.size()==0) {
249 matchesMap.remove(setName);
250 }
251 }
252
253 /* End */
254
255 private void removeSubSet(AnnotationSet theSet, Map matchMap) {
256 AnnotationSet toRemove = theSet.get(new HashSet(annotationTypes));
257 if (toRemove == null || toRemove.isEmpty())
258 return;
259 theSet.removeAll(toRemove);
260 removeAnnotationsFromCorefData(toRemove, theSet.getName(), matchMap);
261 }//removeSubSet
262
263 public void setMarkupASName(String newMarkupASName) {
264 markupSetName = newMarkupASName;
265 }
266
267 public String getMarkupASName() {
268 return markupSetName;
269 }
270
271 public List getAnnotationTypes() {
272 return this.annotationTypes;
273 }
274
275 public void setAnnotationTypes(List newTypes) {
276 annotationTypes = newTypes;
277 }
278
279 public List getSetsToKeep() {
280 return this.setsToKeep;
281 }
282
283 public void setSetsToKeep(List newSetNames) {
284 //we need to modify this list sometimes, so to make sure it's not some
285 //unmodifiable version, we'll create our own
286 setsToKeep = newSetNames != null ?
287 new ArrayList(newSetNames):
288 new ArrayList();
289 }
290
291 public Boolean getKeepOriginalMarkupsAS() {
292 return keepOriginalMarkupsAS;
293 }
294
295 public void setKeepOriginalMarkupsAS(Boolean emptyDefaultAnnotationSet) {
296 this.keepOriginalMarkupsAS = emptyDefaultAnnotationSet;
297 }
298
299
300 } // class AnnotationSetTransfer
|