001 /*
002 * AnnotationSetTransfer.java
003 *
004 * Copyright (c) 2009, The University of Sheffield.
005 *
006 * This file is part of GATE (see http://gate.ac.uk/), and is free software,
007 * licenced under the GNU Library General Public License, Version 2, June 1991
008 * (in the distribution as file licence.html, and also available at
009 * http://gate.ac.uk/gate/licence.html).
010 *
011 * Mark A. Greenwood, 7/10/2009
012 */
013 package gate.creole.annotransfer;
014
015 import gate.Annotation;
016 import gate.AnnotationSet;
017 import gate.Factory;
018 import gate.FeatureMap;
019 import gate.GateConstants;
020 import gate.ProcessingResource;
021 import gate.Resource;
022 import gate.creole.AbstractLanguageAnalyser;
023 import gate.creole.ExecutionException;
024 import gate.creole.ResourceInstantiationException;
025 import gate.util.BomStrippingInputStreamReader;
026 import gate.util.InvalidOffsetException;
027
028 import java.io.BufferedReader;
029 import java.io.IOException;
030 import java.io.Serializable;
031 import java.net.URL;
032 import java.util.ArrayList;
033 import java.util.HashMap;
034 import java.util.Iterator;
035 import java.util.List;
036 import java.util.Map;
037
038 /**
039 * This plugin allows the names of annotations and features to be
040 * changed as well as transfered from one annotation set to another.
041 * Think of it as an extended version of the old AnnotationSet Transfer
042 * plugin.
043 *
044 * @author Mark A. Greenwood
045 */
046 public class AnnotationSetTransfer extends AbstractLanguageAnalyser
047 implements
048 ProcessingResource,
049 Serializable {
050
051 private String tagASName = GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME;
052
053 private String outputASName, inputASName, textTagName;
054
055 private URL configURL;
056
057 private Boolean copyAnnotations, transferAllUnlessFound;
058
059 private gate.AnnotationSet bodyAnnotations = null;
060
061 private List<String> annotationTypes = null;
062
063 Map<String, Mapping> mappings = new HashMap<String, Mapping>();
064
065 @Override
066 public Resource init() throws ResourceInstantiationException {
067 return this;
068 }
069
070 @Override
071 public void execute() throws ExecutionException {
072 AnnotationSet inputAS = document.getAnnotations(inputASName);
073 AnnotationSet outputAS = document.getAnnotations(outputASName);
074 AnnotationSet tagAS = document.getAnnotations(tagASName);
075 AnnotationSet annotsToTransfer = null;
076
077 boolean newID = copyAnnotations && inputAS.equals(outputAS);
078
079 mappings.clear();
080
081 // TODO clean this up so we don't have to repeat ourselves
082 if(configURL != null) {
083
084 try {
085 BufferedReader in = new BomStrippingInputStreamReader(configURL
086 .openStream());
087
088 String line = in.readLine();
089 while(line != null) {
090 if(!line.trim().equals("")) {
091 String[] data = line.split("=", 2);
092 String oldName = data[0].trim();
093 String newName = data.length == 2 ? data[1].trim() : null;
094 mappings.put(oldName, new Mapping(oldName, newName));
095 }
096 line = in.readLine();
097 }
098 }
099 catch(IOException ioe) {
100 ioe.printStackTrace();
101 }
102 }
103 else if(annotationTypes != null) {
104 for(String type : annotationTypes) {
105 String[] data = type.split("=", 2);
106 String oldName = data[0].trim();
107 String newName = data.length == 2 ? data[1].trim() : null;
108
109 mappings.put(oldName, new Mapping(oldName, newName));
110 }
111 }
112 // else
113 // throw new
114 // ExecutionException("The annotation list and URL cannot both be null");
115
116 if(mappings.size() > 0) {
117 annotsToTransfer = inputAS.get(mappings.keySet());
118 }
119 else {
120 // transfer everything
121 annotsToTransfer = inputAS.get();
122 }
123 // in case of no one annotation from some of annotationTypes
124 if(annotsToTransfer == null || annotsToTransfer.size() == 0) return;
125 // check if we have a BODY annotation
126 // if not, just copy all
127 if(textTagName == null || textTagName.equals("")) {
128 // remove from input set unless we copy only
129 if(!copyAnnotations) inputAS.removeAll(annotsToTransfer);
130 transferAnnotations(new ArrayList<Annotation>(annotsToTransfer),
131 outputAS, newID);
132
133 return;
134 }
135 // get the BODY annotation
136 bodyAnnotations = tagAS.get(textTagName);
137 if(bodyAnnotations == null || bodyAnnotations.isEmpty()) {
138 // outputAS.addAll(inputAS);
139 if(transferAllUnlessFound) {
140 // remove from input set unless we copy only
141 if(!copyAnnotations) inputAS.removeAll(annotsToTransfer);
142 transferAnnotations(new ArrayList<Annotation>(annotsToTransfer),
143 outputAS, newID);
144 }
145 return;
146 }
147 List<Annotation> annots2Move = new ArrayList<Annotation>();
148 Iterator<Annotation> bodyIter = bodyAnnotations.iterator();
149 while(bodyIter.hasNext()) {
150 Annotation bodyAnn = bodyIter.next();
151 Long start = bodyAnn.getStartNode().getOffset();
152 Long end = bodyAnn.getEndNode().getOffset();
153 // get all annotations we want transferred
154 AnnotationSet annots2Copy = annotsToTransfer.getContained(start, end);
155 // copy them to the new set and delete them from the old one
156 annots2Move.addAll(annots2Copy);
157 }
158 if(!copyAnnotations) inputAS.removeAll(annots2Move);
159 transferAnnotations(annots2Move, outputAS, newID);
160 }
161
162 private void transferAnnotations(List<Annotation> toTransfer,
163 AnnotationSet to, boolean newID) throws ExecutionException {
164 for(Annotation annot : toTransfer) {
165 Mapping m = mappings.get(annot.getType());
166
167 String name = (m == null || m.newName == null
168 ? annot.getType()
169 : m.newName);
170
171 try {
172 FeatureMap params = Factory.newFeatureMap();
173 params.putAll(annot.getFeatures());
174 if(newID) {
175 to.add(annot.getStartNode().getOffset(), annot.getEndNode()
176 .getOffset(), name, params);
177 }
178 else {
179 to.add(annot.getId(), annot.getStartNode().getOffset(), annot
180 .getEndNode().getOffset(), name, params);
181 }
182 }
183 catch(InvalidOffsetException e) {
184 throw new ExecutionException(e);
185 }
186 }
187 }
188
189 public void setTagASName(String newTagASName) {
190 // if given an empty string, set to the default set
191 if("".equals(newTagASName))
192 tagASName = null;
193 else tagASName = newTagASName;
194 }
195
196 public String getTagASName() {
197 return tagASName;
198 }
199
200 public void setInputASName(String newInputASName) {
201 inputASName = newInputASName;
202 }
203
204 public String getInputASName() {
205 return inputASName;
206 }
207
208 public void setOutputASName(String newOutputASName) {
209 outputASName = newOutputASName;
210 }
211
212 public String getOutputASName() {
213 return outputASName;
214 }
215
216 public void setTextTagName(String newTextTagName) {
217 textTagName = newTextTagName;
218 }
219
220 public String getTextTagName() {
221 return textTagName;
222 }
223
224 public List<String> getAnnotationTypes() {
225 return annotationTypes;
226 }
227
228 public void setAnnotationTypes(List<String> newTypes) {
229 annotationTypes = newTypes;
230 }
231
232 public void setConfigURL(URL url) {
233 configURL = url;
234 }
235
236 public URL getConfigURL() {
237 return configURL;
238 }
239
240 public Boolean getCopyAnnotations() {
241 return this.copyAnnotations;
242 }
243
244 public void setCopyAnnotations(Boolean copyAnnotations) {
245 this.copyAnnotations = copyAnnotations;
246 }
247
248 public Boolean getTransferAllUnlessFound() {
249 return this.transferAllUnlessFound;
250 }
251
252 public void setTransferAllUnlessFound(Boolean value) {
253 this.transferAllUnlessFound = value;
254 }
255
256 class Mapping {
257 String oldName, newName;
258
259 // TODO implement the renaming of features as well as annotations
260 // Map<String, String> features = new HashMap<String, String>();
261
262 public Mapping(String oldName, String newName) {
263 this.oldName = oldName;
264 this.newName = newName;
265 }
266
267 @Override
268 public String toString() {
269 StringBuilder result = new StringBuilder();
270 result.append(oldName);
271 if(newName != null) {
272 result.append("=").append(newName);
273 }
274 return result.toString();
275 }
276 }
277 }
|