001 /**
002 *
003 * Copyright (c) 1995-2010, The University of Sheffield. See the file
004 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
005 *
006 * This file is part of GATE (see http://gate.ac.uk/), and is free
007 * software, licenced under the GNU Library General Public License,
008 * Version 2, June 1991 (in the distribution as file licence.html,
009 * and also available at http://gate.ac.uk/gate/licence.html).
010 *
011 * $Id: IaaCalculation.java 9050 2007-09-04 10:42:12Z yaoyongli $
012 */
013
014 package gate.util;
015
016 import gate.Annotation;
017 import gate.AnnotationSet;
018 import java.util.HashMap;
019 import java.util.HashSet;
020 import java.util.Vector;
021 /**
022 * Merging the annotations from different annotators. The input
023 * is the array containing the annotation set for merging. The
024 * output is a map, the key of which is the merged annotations
025 * and the values of which represent those annotators who agree
026 * on the merged annotation. Two merging methods are implemented.
027 * One method selects one annotation if at least a pre-defined
028 * number of annotators agree on it. If there are more than
029 * one merged annotations with the same span, the program selects only
030 * one annotation from them with the maximal number of annotators
031 * on it. Another method selects only one
032 * annotation from those annotations with the same span,
033 * which majority of the annotators support.
034 */
035 public class AnnotationMerging {
036
037 /**
038 * Merge all annotationset from an array. If one annotation is in at least
039 * numK annotation sets, then put it into the merging annotation set.
040 */
041 public static void mergeAnnotation(AnnotationSet[] annsArr, String nameFeat,
042 HashMap<Annotation, String> mergeAnns, int numMinK, boolean isTheSameInstances) {
043 int numA = annsArr.length;
044 // First copy the annotatioin sets into a temp array
045 HashSet<Annotation>[] annsArrTemp = new HashSet[numA];
046 for(int i = 0; i < numA; ++i) {
047 if(annsArr[i] != null) {
048 annsArrTemp[i] = new HashSet<Annotation>();
049 for(Annotation ann : annsArr[i])
050 annsArrTemp[i].add(ann);
051 }
052 }
053 HashSet<String> featSet = new HashSet<String>();
054 if(nameFeat != null) featSet.add(nameFeat);
055 if(numMinK<1) numMinK=1;
056 for(int iA = 0; iA < numA - numMinK + 1; ++iA) {
057 if(annsArrTemp[iA] != null) {
058 for(Annotation ann : annsArrTemp[iA]) {
059 int numContained = 1;
060 StringBuffer featAdd = new StringBuffer();
061 featAdd.append(iA);
062 StringBuffer featDisa = new StringBuffer();
063 if(iA>0) {
064 featDisa.append("0");
065 for(int i=1; i<iA; ++i)
066 featDisa.append("-"+i);
067 }
068 int numDisagreed = iA;
069 for(int i = iA + 1; i < numA; ++i) {
070 boolean isContained = false;
071 if(annsArrTemp[i] != null) {
072 Annotation annT = null;
073 for(Annotation ann0 : annsArrTemp[i]) {
074 if(ann0.isCompatible(ann, featSet)) {
075 ++numContained;
076 featAdd.append("-" + i);
077 annT = ann0;
078 isContained = true;
079 break;
080 }
081 }
082 if(isContained)
083 annsArrTemp[i].remove(annT);
084 }
085 if(!isContained){
086 if(numDisagreed==0)
087 featDisa.append(i);
088 else featDisa.append("-"+i);
089 ++numDisagreed;
090 }
091 }
092 if(numContained >= numMinK) {
093 mergeAnns.put(ann, featAdd.toString());
094 } else if(isTheSameInstances && nameFeat != null) {
095 ann.getFeatures().remove(nameFeat);
096 mergeAnns.put(ann, featAdd.toString());
097 }
098 }
099 }
100 }
101 //Remove the annotation in the same place
102 removeDuplicate(mergeAnns);
103 return;
104 }
105 /**
106 * Merge all annotationset from an array. If one annotation is agreed by
107 * the majority of the annotators, then put it into the merging annotation set.
108 */
109 public static void mergeAnnotationMajority(AnnotationSet[] annsArr, String nameFeat,
110 HashMap<Annotation, String> mergeAnns, boolean isTheSameInstances) {
111 int numA = annsArr.length;
112 if(nameFeat == null) {
113 mergeAnnogationMajorityNoFeat(annsArr, mergeAnns, isTheSameInstances);
114 return;
115 }
116
117 // First copy the annotatioin sets into a temp array
118 HashSet<Annotation>[] annsArrTemp = new HashSet[numA];
119 for(int i = 0; i < numA; ++i) {
120 if(annsArr[i] != null) {
121 annsArrTemp[i] = new HashSet<Annotation>();
122 for(Annotation ann : annsArr[i])
123 annsArrTemp[i].add(ann);
124 }
125 }
126 for(int iA = 0; iA < numA; ++iA) {
127 if(annsArrTemp[iA] != null) {
128 for(Annotation ann : annsArrTemp[iA]) {
129 int numDisagreed=0;
130 //Already the iA annotators don't agree the annotation
131 numDisagreed = iA;
132 StringBuffer featDisa = new StringBuffer();
133 if(iA>0) {
134 featDisa.append("0");
135 for(int i=1; i<iA; ++i)
136 featDisa.append("-"+i);
137 }
138 HashMap<String,String>featOthers = new HashMap<String,String>();
139 String featTh = null;
140 if(ann.getFeatures().get(nameFeat)!= null)
141 featTh = ann.getFeatures().get(nameFeat).toString();
142
143 featOthers.put(featTh, new Integer(iA).toString());
144 HashMap<String,Annotation>annAll = new HashMap<String,Annotation>();
145 annAll.put(featTh, ann);
146 for(int i = iA + 1; i < numA; ++i) {
147 boolean isContained = false;
148 if(annsArrTemp[i] != null) {
149 Annotation annT = null;
150 for(Annotation ann0 : annsArrTemp[i]) {
151 if(ann0.coextensive(ann)) {
152 String featValue = null;
153 if(ann0.getFeatures().get(nameFeat)!=null)
154 featValue = ann0.getFeatures().get(nameFeat).toString();
155 if(!featOthers.containsKey(featValue)) {
156 featOthers.put(featValue, new Integer(i).toString());
157 annAll.put(featValue, ann0);
158 }
159 else {
160 String str = featOthers.get(featValue);
161 featOthers.put(featValue, str+"-"+i);
162 }
163 annT = ann0;
164 isContained = true;
165 break;
166 }
167 }
168 if(isContained)
169 annsArrTemp[i].remove(annT);
170 }
171 if(!isContained) {
172 if(numDisagreed==0)
173 featDisa.append(i);
174 else featDisa.append("-"+i);
175 ++numDisagreed;
176 }
177 }//end of the loop for the following annotation set
178 int numAgreed = -1;
179 String agreeFeat = null;
180 for(String str:featOthers.keySet()) {
181 String str0 = featOthers.get(str);
182 int num=1;
183 while(str0.contains("-")) {
184 ++num;
185 str0 = str0.substring(str0.indexOf('-')+1);
186 }
187 if(numAgreed<num) {
188 numAgreed = num;
189 agreeFeat = str;
190 }
191 }
192 if(numAgreed >= numDisagreed) {
193 mergeAnns.put(annAll.get(agreeFeat), featOthers.get(agreeFeat));
194 } else if(isTheSameInstances) {
195 if(ann.getFeatures().get(nameFeat)!= null)
196 ann.getFeatures().remove(nameFeat);
197 mergeAnns.put(ann, featDisa.toString());
198 }
199 } //for each ann in the current annotation set
200 }
201 }
202 return;
203 }
204 /** The majority merging method for the annotaiton not specifying any annotation
205 * feature for label.
206 * */
207 private static void mergeAnnogationMajorityNoFeat(AnnotationSet[] annsArr,
208 HashMap<Annotation, String> mergeAnns, boolean isTheSameInstances) {
209 int numA = annsArr.length;
210 // First copy the annotatioin sets into a temp array
211 HashSet<Annotation>[] annsArrTemp = new HashSet[numA];
212 for(int i = 0; i < numA; ++i) {
213 if(annsArr[i] != null) {
214 annsArrTemp[i] = new HashSet<Annotation>();
215 for(Annotation ann : annsArr[i])
216 annsArrTemp[i].add(ann);
217 }
218 }
219 for(int iA = 0; iA < numA; ++iA) {
220 if(annsArrTemp[iA] != null) {
221 for(Annotation ann : annsArrTemp[iA]) {
222 int numDisagreed=0;
223 //Already the iA annotators don't agree the annotation
224 numDisagreed = iA;
225 StringBuffer featDisa = new StringBuffer();
226 if(iA>0) {
227 featDisa.append("0");
228 for(int i=1; i<iA; ++i)
229 featDisa.append("-"+i);
230 }
231 int numAgreed=1;
232 StringBuffer featAdd = new StringBuffer();
233 featAdd.append(iA);
234 for(int i = iA + 1; i < numA; ++i) {
235 boolean isContained = false;
236 if(annsArrTemp[i] != null) {
237 Annotation annT = null;
238 for(Annotation ann0 : annsArrTemp[i]) {
239 if(ann0.coextensive(ann)) {
240 ++numAgreed;
241 annT = ann0;
242 isContained = true;
243 featAdd.append("-"+i);
244 break;
245 }
246 }
247 if(isContained)
248 annsArrTemp[i].remove(annT);
249 }
250 if(!isContained) {
251 if(numDisagreed==0)
252 featDisa.append(i);
253 else featDisa.append("-"+i);
254 ++numDisagreed;
255 }
256 }//end of the loop for the following annotation set
257 if(numAgreed >= numDisagreed) {
258 mergeAnns.put(ann, featAdd.toString());
259 } else if(isTheSameInstances) {
260 mergeAnns.put(ann, featAdd.toString());
261 }
262 } //for each ann in the current annotation set
263 }
264 }
265 return;
266 }
267 /** Remove the duplicate annotations from the merged annotations. */
268 private static void removeDuplicate(HashMap<Annotation, String> mergeAnns) {
269 // first copy the annotations into a tempory
270 HashMap <Annotation, Integer> mergeAnnsNum = new HashMap<Annotation, Integer>();
271 for(Annotation ann:mergeAnns.keySet()) {
272 String str = mergeAnns.get(ann);
273 int num=1;
274 while(str.contains("-")) {
275 ++num;
276 str = str.substring(str.indexOf('-')+1);
277 }
278 mergeAnnsNum.put(ann, new Integer(num));
279 }
280 //remove the annotaitons having the same places
281 for(Annotation ann:mergeAnnsNum.keySet()) {
282 Annotation annT=null;
283 int num0=-1;
284 Vector<Annotation>sameAnns= new Vector<Annotation>();
285 for(Annotation ann1:mergeAnnsNum.keySet()) {
286 if(ann.coextensive(ann1)) {
287 sameAnns.add(ann1);
288 int num = mergeAnnsNum.get(ann1).intValue();
289 if(num>num0) {
290 annT = ann1;
291 num0 = num;
292 }
293 }
294 } //end the inner loop for merged annotations
295 //Keep the one which most annotators agree on.
296 sameAnns.remove(annT);
297 //Remove all others
298 for(int i=0; i<sameAnns.size(); ++i)
299 mergeAnns.remove(sameAnns.elementAt(i));
300 }
301 }
302
303
304 /**
305 * Check if the annotation sets contain the same annotations.
306 */
307 public static boolean isSameInstancesForAnnotators(AnnotationSet[] annsA, int vsy) {
308 int numAnnotators = annsA.length;
309 if(annsA[0] == null) return false;
310 for(Annotation ann : annsA[0]) {
311 for(int iJud = 1; iJud < numAnnotators; ++iJud) {
312 if(annsA[iJud] == null) return false;
313 boolean isContained = false;
314 for(Annotation ann1 : annsA[iJud]) {
315 // If the ann is not the same
316 if(ann.coextensive(ann1)) {
317 isContained = true;
318 break;
319 }
320 }
321 if(!isContained) {
322 if(vsy>0)
323 System.out.println("The " + iJud + " annotator cause different");
324 return false;
325 }
326 }// end of the loop for annotators
327 }// end of loop for each annotation in one document
328 // If the annotated instances are the same for every annotators.
329 return true;
330 }
331 }
|