001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * Valentin Tablan 28/01/2003
011 *
012 * $Id: AnnotationDiffer.java 12798 2010-07-01 12:16:27Z thomas_heitz $
013 *
014 */
015 package gate.util;
016
017 import java.text.NumberFormat;
018 import java.util.*;
019
020 import gate.Annotation;
021
022 /**
023 * This class provides the logic used by the Annotation Diff tool. It starts
024 * with two collections of annotation objects, one of key annotations
025 * (representing the gold standard) and one of response annotations
026 * (representing the system's responses). It will then pair the keys and
027 * responses in a way that maximises the score. Each key - response pair gets a
028 * score of {@link #CORRECT_VALUE} (2), {@link #PARTIALLY_CORRECT_VALUE} (1) or
029 * {@link #WRONG_VALUE} (0)depending on whether the two annotations match are
030 * overlapping or completely unmatched. Each pairing also has a type of
031 * {@link #CORRECT_TYPE}, {@link #PARTIALLY_CORRECT_TYPE},
032 * {@link #SPURIOUS_TYPE} or {@link #MISSING_TYPE} further detailing the type of
033 * error for the wrong matches (<i>missing</i> being the keys that weren't
034 * matched to a response while <i>spurious</i> are the responses that were
035 * over-generated and are not matching any key.
036 *
037 * Precision, recall and f-measure are also calculated.
038 */
039 public class AnnotationDiffer {
040
041 /**
042 * Constructor to be used when you have a collection of AnnotationDiffer
043 * and want to consider it as only one AnnotationDiffer.
044 * Then you can only use the methods getPrecision/Recall/FMeasure...().
045 * @param differs collection to be regrouped in one AnnotationDiffer
046 */
047 public AnnotationDiffer(Collection<AnnotationDiffer> differs) {
048 correctMatches = 0;
049 partiallyCorrectMatches = 0;
050 missing = 0;
051 spurious = 0;
052 int keyCount = 0;
053 int responseCount = 0;
054 for (AnnotationDiffer differ : differs) {
055 // set the correct, partial, spurious and missing values to be
056 // the sum of those in the collection
057 correctMatches += differ.getCorrectMatches();
058 partiallyCorrectMatches += differ.getPartiallyCorrectMatches();
059 missing += differ.getMissing();
060 spurious += differ.getSpurious();
061 keyCount += differ.getKeysCount();
062 responseCount += differ.getResponsesCount();
063 }
064 keyList = new ArrayList(Collections.nCopies(keyCount, null));
065 responseList = new ArrayList(Collections.nCopies(responseCount, null));
066 }
067
068 public AnnotationDiffer() {
069 // empty constructor
070 }
071
072 /**
073 * Interface representing a pairing between a key annotation and a response
074 * one.
075 */
076 public static interface Pairing{
077 /**
078 * Gets the key annotation of the pairing. Can be <tt>null</tt> (for
079 * spurious matches).
080 * @return an {@link Annotation} object.
081 */
082 public Annotation getKey();
083
084 /**
085 * Gets the response annotation of the pairing. Can be <tt>null</tt> (for
086 * missing matches).
087 * @return an {@link Annotation} object.
088 */
089 public Annotation getResponse();
090
091 /**
092 * Gets the type of the pairing, one of {@link #CORRECT_TYPE},
093 * {@link #PARTIALLY_CORRECT_TYPE}, {@link #SPURIOUS_TYPE} or
094 * {@link #MISSING_TYPE},
095 * @return an <tt>int</tt> value.
096 */
097 public int getType();
098 }
099
100 /**
101 * Computes a diff between two collections of annotations.
102 * @param key the collection of key annotations.
103 * @param response the collection of response annotations.
104 * @return a list of {@link Pairing} objects representing the pairing set
105 * that results in the best score.
106 */
107 public List calculateDiff(Collection key, Collection response){
108
109 //initialise data structures
110 if(key == null || key.size() == 0)
111 keyList = new ArrayList();
112 else
113 keyList = new ArrayList(key);
114
115 if(response == null || response.size() == 0)
116 responseList = new ArrayList();
117 else
118 responseList = new ArrayList(response);
119
120 if(correctAnnotations != null) correctAnnotations.clear();
121 if(partiallyCorrectAnnotations != null) partiallyCorrectAnnotations.clear();
122 if(missingAnnotations != null) missingAnnotations.clear();
123 if(spuriousAnnotations != null) spuriousAnnotations.clear();
124
125 keyChoices = new ArrayList(keyList.size());
126 keyChoices.addAll(Collections.nCopies(keyList.size(), null));
127 responseChoices = new ArrayList(responseList.size());
128 responseChoices.addAll(Collections.nCopies(responseList.size(), null));
129
130 possibleChoices = new ArrayList();
131
132 //1) try all possible pairings
133 for(int i = 0; i < keyList.size(); i++){
134 for(int j =0; j < responseList.size(); j++){
135 Annotation keyAnn = (Annotation)keyList.get(i);
136 Annotation resAnn = (Annotation)responseList.get(j);
137 PairingImpl choice = null;
138 if(keyAnn.coextensive(resAnn)){
139 //we have full overlap -> CORRECT or WRONG
140 if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
141 //we have a full match
142 choice = new PairingImpl(i, j, CORRECT_VALUE);
143 }else{
144 //the two annotations are coextensive but don't match
145 //we have a missmatch
146 choice = new PairingImpl(i, j, MISMATCH_VALUE);
147 }
148 }else if(keyAnn.overlaps(resAnn)){
149 //we have partial overlap -> PARTIALLY_CORRECT or WRONG
150 if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
151 choice = new PairingImpl(i, j, PARTIALLY_CORRECT_VALUE);
152 }else{
153 choice = new PairingImpl(i, j, WRONG_VALUE);
154 }
155 }
156
157 //add the new choice if any
158 if (choice != null) {
159 addPairing(choice, i, keyChoices);
160 addPairing(choice, j, responseChoices);
161 possibleChoices.add(choice);
162 }
163 }//for j
164 }//for i
165
166 //2) from all possible pairings, find the maximal set that also
167 //maximises the total score
168 Collections.sort(possibleChoices, new PairingScoreComparator());
169 Collections.reverse(possibleChoices);
170 finalChoices = new ArrayList();
171 correctMatches = 0;
172 partiallyCorrectMatches = 0;
173 missing = 0;
174 spurious = 0;
175
176 while(!possibleChoices.isEmpty()){
177 PairingImpl bestChoice = (PairingImpl)possibleChoices.remove(0);
178 bestChoice.consume();
179 finalChoices.add(bestChoice);
180 switch(bestChoice.value){
181 case CORRECT_VALUE:{
182 if(correctAnnotations == null) correctAnnotations = new HashSet();
183 correctAnnotations.add(bestChoice.getResponse());
184 correctMatches++;
185 bestChoice.setType(CORRECT_TYPE);
186 break;
187 }
188 case PARTIALLY_CORRECT_VALUE:{
189 if(partiallyCorrectAnnotations == null) partiallyCorrectAnnotations = new HashSet();
190 partiallyCorrectAnnotations.add(bestChoice.getResponse());
191 partiallyCorrectMatches++;
192 bestChoice.setType(PARTIALLY_CORRECT_TYPE);
193 break;
194 }
195 case MISMATCH_VALUE:{
196 //this is a mising and a spurious annotations together
197 if(missingAnnotations == null) missingAnnotations = new HashSet();
198 missingAnnotations.add(bestChoice.getKey());
199 missing ++;
200 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
201 spuriousAnnotations.add(bestChoice.getResponse());
202 spurious ++;
203 bestChoice.setType(MISMATCH_TYPE);
204 break;
205 }
206 case WRONG_VALUE:{
207 if(bestChoice.getKey() != null){
208 //we have a missed key
209 if(missingAnnotations == null) missingAnnotations = new HashSet();
210 missingAnnotations.add(bestChoice.getKey());
211 missing ++;
212 bestChoice.setType(MISSING_TYPE);
213 }
214 if(bestChoice.getResponse() != null){
215 //we have a spurious response
216 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
217 spuriousAnnotations.add(bestChoice.getResponse());
218 spurious ++;
219 bestChoice.setType(SPURIOUS_TYPE);
220 }
221 break;
222 }
223 default:{
224 throw new GateRuntimeException("Invalid pairing type: " +
225 bestChoice.value);
226 }
227 }
228 }
229 //add choices for the incorrect matches (MISSED, SPURIOUS)
230 //get the unmatched keys
231 for(int i = 0; i < keyChoices.size(); i++){
232 List aList = (List)keyChoices.get(i);
233 if(aList == null || aList.isEmpty()){
234 if(missingAnnotations == null) missingAnnotations = new HashSet();
235 missingAnnotations.add((Annotation)(keyList.get(i)));
236 PairingImpl choice = new PairingImpl(i, -1, WRONG_VALUE);
237 choice.setType(MISSING_TYPE);
238 finalChoices.add(choice);
239 missing ++;
240 }
241 }
242
243 //get the unmatched responses
244 for(int i = 0; i < responseChoices.size(); i++){
245 List aList = (List)responseChoices.get(i);
246 if(aList == null || aList.isEmpty()){
247 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
248 spuriousAnnotations.add((Annotation)(responseList.get(i)));
249 PairingImpl choice = new PairingImpl(-1, i, WRONG_VALUE);
250 choice.setType(SPURIOUS_TYPE);
251 finalChoices.add(choice);
252 spurious ++;
253 }
254 }
255
256 return finalChoices;
257 }
258
259 /**
260 * Gets the strict precision (the ratio of correct responses out of all the
261 * provided responses).
262 * @return a <tt>double</tt> value.
263 */
264 public double getPrecisionStrict(){
265 if(responseList.size() == 0) {
266 return 1.0;
267 }
268 return correctMatches/(double)responseList.size();
269 }
270
271 /**
272 * Gets the strict recall (the ratio of key matched to a response out of all
273 * the keys).
274 * @return a <tt>double</tt> value.
275 */
276 public double getRecallStrict(){
277 if(keyList.size() == 0) {
278 return 1.0;
279 }
280 return correctMatches/(double)keyList.size();
281 }
282
283 /**
284 * Gets the lenient precision (where the partial matches are considered as
285 * correct).
286 * @return a <tt>double</tt> value.
287 */
288 public double getPrecisionLenient(){
289 if(responseList.size() == 0) {
290 return 1.0;
291 }
292 return ((double)correctMatches + partiallyCorrectMatches) / (double)responseList.size();
293 }
294
295 /**
296 * Gets the average of the strict and lenient precision values.
297 * @return a <tt>double</tt> value.
298 */
299 public double getPrecisionAverage() {
300 return ((double)getPrecisionLenient() + getPrecisionStrict()) / (double)(2.0);
301 }
302
303 /**
304 * Gets the lenient recall (where the partial matches are considered as
305 * correct).
306 * @return a <tt>double</tt> value.
307 */
308 public double getRecallLenient(){
309 if(keyList.size() == 0) {
310 return 1.0;
311 }
312 return ((double)correctMatches + partiallyCorrectMatches) / (double)keyList.size();
313 }
314
315 /**
316 * Gets the average of the strict and lenient recall values.
317 * @return a <tt>double</tt> value.
318 */
319 public double getRecallAverage() {
320 return ((double) getRecallLenient() + getRecallStrict()) / (double)(2.0);
321 }
322
323 /**
324 * Gets the strict F-Measure (the harmonic weighted mean of the strict
325 * precision and the strict recall) using the provided parameter as relative
326 * weight.
327 * @param beta The relative weight of precision and recall. A value of 1
328 * gives equal weights to precision and recall. A value of 0 takes the recall
329 * value completely out of the equation.
330 * @return a <tt>double</tt>value.
331 */
332 public double getFMeasureStrict(double beta){
333 double precision = getPrecisionStrict();
334 double recall = getRecallStrict();
335 double betaSq = beta * beta;
336 double answer = (double)(((double)(betaSq + 1) * precision * recall ) / (double)(betaSq * precision + recall));
337 if(Double.isNaN(answer)) answer = 0.0;
338 return answer;
339 }
340
341 /**
342 * Gets the lenient F-Measure (F-Measure where the lenient precision and
343 * recall values are used) using the provided parameter as relative weight.
344 * @param beta The relative weight of precision and recall. A value of 1
345 * gives equal weights to precision and recall. A value of 0 takes the recall
346 * value completely out of the equation.
347 * @return a <tt>double</tt>value.
348 */
349 public double getFMeasureLenient(double beta){
350 double precision = getPrecisionLenient();
351 double recall = getRecallLenient();
352 double betaSq = beta * beta;
353 double answer = (double)(((double)(betaSq + 1) * precision * recall) / ((double)betaSq * precision + recall));
354 if(Double.isNaN(answer)) answer = 0.0;
355 return answer;
356 }
357
358 /**
359 * Gets the average of strict and lenient F-Measure values.
360 * @param beta The relative weight of precision and recall. A value of 1
361 * gives equal weights to precision and recall. A value of 0 takes the recall
362 * value completely out of the equation.
363 * @return a <tt>double</tt>value.
364 */
365 public double getFMeasureAverage(double beta) {
366 double answer = ((double)getFMeasureLenient(beta) + (double)getFMeasureStrict(beta)) / (double)(2.0);
367 return answer;
368 }
369
370 /**
371 * Gets the number of correct matches.
372 * @return an <tt>int<tt> value.
373 */
374 public int getCorrectMatches(){
375 return correctMatches;
376 }
377
378 /**
379 * Gets the number of partially correct matches.
380 * @return an <tt>int<tt> value.
381 */
382 public int getPartiallyCorrectMatches(){
383 return partiallyCorrectMatches;
384 }
385
386 /**
387 * Gets the number of pairings of type {@link #MISSING_TYPE}.
388 * @return an <tt>int<tt> value.
389 */
390 public int getMissing(){
391 return missing;
392 }
393
394 /**
395 * Gets the number of pairings of type {@link #SPURIOUS_TYPE}.
396 * @return an <tt>int<tt> value.
397 */
398 public int getSpurious(){
399 return spurious;
400 }
401
402 /**
403 * Gets the number of pairings of type {@link #SPURIOUS_TYPE}.
404 * @return an <tt>int<tt> value.
405 */
406 public int getFalsePositivesStrict(){
407 return responseList.size() - correctMatches;
408 }
409
410 /**
411 * Gets the number of responses that aren't either correct or partially
412 * correct.
413 * @return an <tt>int<tt> value.
414 */
415 public int getFalsePositivesLenient(){
416 return responseList.size() - correctMatches - partiallyCorrectMatches;
417 }
418
419 /**
420 * Gets the number of keys provided.
421 * @return an <tt>int<tt> value.
422 */
423 public int getKeysCount() {
424 return keyList.size();
425 }
426
427 /**
428 * Gets the number of responses provided.
429 * @return an <tt>int<tt> value.
430 */
431 public int getResponsesCount() {
432 return responseList.size();
433 }
434
435 /**
436 * Prints to System.out the pairings that are not correct.
437 */
438 public void printMissmatches(){
439 //get the partial correct matches
440 Iterator iter = finalChoices.iterator();
441 while(iter.hasNext()){
442 PairingImpl aChoice = (PairingImpl)iter.next();
443 switch(aChoice.value){
444 case PARTIALLY_CORRECT_VALUE:{
445 System.out.println("Missmatch (partially correct):");
446 System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString());
447 System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString());
448 break;
449 }
450 }
451 }
452
453 //get the unmatched keys
454 for(int i = 0; i < keyChoices.size(); i++){
455 List aList = (List)keyChoices.get(i);
456 if(aList == null || aList.isEmpty()){
457 System.out.println("Missed Key: " + keyList.get(i).toString());
458 }
459 }
460
461 //get the unmatched responses
462 for(int i = 0; i < responseChoices.size(); i++){
463 List aList = (List)responseChoices.get(i);
464 if(aList == null || aList.isEmpty()){
465 System.out.println("Spurious Response: " + responseList.get(i).toString());
466 }
467 }
468 }
469
470
471
472 /**
473 * Performs some basic checks over the internal data structures from the last
474 * run.
475 * @throws Exception
476 */
477 void sanityCheck()throws Exception{
478 //all keys and responses should have at most one choice left
479 Iterator iter =keyChoices.iterator();
480 while(iter.hasNext()){
481 List choices = (List)iter.next();
482 if(choices != null){
483 if(choices.size() > 1){
484 throw new Exception("Multiple choices found!");
485 }else if(!choices.isEmpty()){
486 //size must be 1
487 PairingImpl aChoice = (PairingImpl)choices.get(0);
488 //the SAME choice should be found for the associated response
489 List otherChoices = (List)responseChoices.get(aChoice.responseIndex);
490 if(otherChoices == null ||
491 otherChoices.size() != 1 ||
492 otherChoices.get(0) != aChoice){
493 throw new Exception("Reciprocity error!");
494 }
495 }
496 }
497 }
498
499 iter =responseChoices.iterator();
500 while(iter.hasNext()){
501 List choices = (List)iter.next();
502 if(choices != null){
503 if(choices.size() > 1){
504 throw new Exception("Multiple choices found!");
505 }else if(!choices.isEmpty()){
506 //size must be 1
507 PairingImpl aChoice = (PairingImpl)choices.get(0);
508 //the SAME choice should be found for the associated response
509 List otherChoices = (List)keyChoices.get(aChoice.keyIndex);
510 if(otherChoices == null){
511 throw new Exception("Reciprocity error : null!");
512 }else if(otherChoices.size() != 1){
513 throw new Exception("Reciprocity error: not 1!");
514 }else if(otherChoices.get(0) != aChoice){
515 throw new Exception("Reciprocity error: different!");
516 }
517 }
518 }
519 }
520 }
521
522 /**
523 * Adds a new pairing to the internal data structures.
524 * @param pairing the pairing to be added
525 * @param index the index in the list of pairings
526 * @param listOfPairings the list of {@link Pairing}s where the
527 * pairing should be added
528 */
529 protected void addPairing(PairingImpl pairing, int index, List listOfPairings){
530 List existingChoices = (List)listOfPairings.get(index);
531 if(existingChoices == null){
532 existingChoices = new ArrayList();
533 listOfPairings.set(index, existingChoices);
534 }
535 existingChoices.add(pairing);
536 }
537
538 /**
539 * Gets the set of features considered significant for the matching algorithm.
540 * @return a Set.
541 */
542 public java.util.Set getSignificantFeaturesSet() {
543 return significantFeaturesSet;
544 }
545
546 /**
547 * Set the set of features considered significant for the matching algorithm.
548 * A <tt>null</tt> value means that all features are significant, an empty
549 * set value means that no features are significant while a set of String
550 * values specifies that only features with names included in the set are
551 * significant.
552 * @param significantFeaturesSet a Set of String values or <tt>null<tt>.
553 */
554 public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) {
555 this.significantFeaturesSet = significantFeaturesSet;
556 }
557
558 /**
559 * Represents a pairing of a key annotation with a response annotation and
560 * the associated score for that pairing.
561 */
562 public class PairingImpl implements Pairing{
563 PairingImpl(int keyIndex, int responseIndex, int value) {
564 this.keyIndex = keyIndex;
565 this.responseIndex = responseIndex;
566 this.value = value;
567 scoreCalculated = false;
568 }
569
570 public int getScore(){
571 if(scoreCalculated) return score;
572 else{
573 calculateScore();
574 return score;
575 }
576 }
577
578 public Annotation getKey(){
579 return keyIndex == -1 ? null : (Annotation)keyList.get(keyIndex);
580 }
581
582 public Annotation getResponse(){
583 return responseIndex == -1 ? null :
584 (Annotation)responseList.get(responseIndex);
585 }
586
587 public int getType(){
588 return type;
589 }
590
591
592 public void setType(int type) {
593 this.type = type;
594 }
595
596 /**
597 * Removes all mutually exclusive OTHER choices possible from
598 * the data structures.
599 * <tt>this</tt> gets removed from {@link #possibleChoices} as well.
600 */
601 public void consume(){
602 possibleChoices.remove(this);
603 List sameKeyChoices = (List)keyChoices.get(keyIndex);
604 sameKeyChoices.remove(this);
605 possibleChoices.removeAll(sameKeyChoices);
606
607 List sameResponseChoices = (List)responseChoices.get(responseIndex);
608 sameResponseChoices.remove(this);
609 possibleChoices.removeAll(sameResponseChoices);
610
611 Iterator iter = new ArrayList(sameKeyChoices).iterator();
612 while(iter.hasNext()){
613 ((PairingImpl)iter.next()).remove();
614 }
615 iter = new ArrayList(sameResponseChoices).iterator();
616 while(iter.hasNext()){
617 ((PairingImpl)iter.next()).remove();
618 }
619 sameKeyChoices.add(this);
620 sameResponseChoices.add(this);
621 }
622
623 /**
624 * Removes this choice from the two lists it belongs to
625 */
626 protected void remove(){
627 List fromKey = (List)keyChoices.get(keyIndex);
628 fromKey.remove(this);
629 List fromResponse = (List)responseChoices.get(responseIndex);
630 fromResponse.remove(this);
631 }
632
633 /**
634 * Calculates the score for this choice as:
635 * type - sum of all the types of all OTHER mutually exclusive choices
636 */
637 void calculateScore(){
638 //this needs to be a set so we don't count conflicts twice
639 Set conflictSet = new HashSet();
640 //add all the choices from the same response annotation
641 conflictSet.addAll((List)responseChoices.get(responseIndex));
642 //add all the choices from the same key annotation
643 conflictSet.addAll((List)keyChoices.get(keyIndex));
644 //remove this choice from the conflict set
645 conflictSet.remove(this);
646 score = value;
647 Iterator conflictIter = conflictSet.iterator();
648 while(conflictIter.hasNext()) score -= ((PairingImpl)conflictIter.next()).value;
649 scoreCalculated = true;
650 }
651
652 /**
653 * The index in the key collection of the key annotation for this pairing
654 */
655 int keyIndex;
656 /**
657 * The index in the response collection of the response annotation for this
658 * pairing
659 */
660 int responseIndex;
661
662 /**
663 * The type of this pairing.
664 */
665 int type;
666
667 /**
668 * The value for this pairing. This value depends only on this pairing, not
669 * on the conflict set.
670 */
671 int value;
672
673 /**
674 * The score of this pairing (calculated based on value and conflict set).
675 */
676 int score;
677 boolean scoreCalculated;
678 }
679
680 /**
681 * Compares two pairings:
682 * the better score is preferred;
683 * for the same score the better type is preferred (exact matches are
684 * preffered to partial ones).
685 */
686 protected static class PairingScoreComparator implements Comparator{
687 /**
688 * Compares two choices:
689 * the better score is preferred;
690 * for the same score the better type is preferred (exact matches are
691 * preffered to partial ones).
692 * @return a positive value if the first pairing is better than the second,
693 * zero if they score the same or negative otherwise.
694 */
695
696 public int compare(Object o1, Object o2){
697 PairingImpl first = (PairingImpl)o1;
698 PairingImpl second = (PairingImpl)o2;
699 //compare by score
700 int res = first.getScore() - second.getScore();
701 //compare by type
702 if(res == 0) res = first.getType() - second.getType();
703 //compare by completeness (a wrong match with both key and response
704 //is "better" than one with only key or response
705 if(res == 0){
706 res = (first.getKey() == null ? 0 : 1) +
707 (first.getResponse() == null ? 0 : 1) +
708 (second.getKey() == null ? 0 : -1) +
709 (second.getResponse() == null ? 0 : -1);
710 }
711 return res;
712 }
713 }
714
715 /**
716 * Compares two choices based on start offset of key (or response
717 * if key not present) and type if offsets are equal.
718 */
719 public static class PairingOffsetComparator implements Comparator{
720 /**
721 * Compares two choices based on start offset of key (or response
722 * if key not present) and type if offsets are equal.
723 */
724 public int compare(Object o1, Object o2){
725 Pairing first = (Pairing)o1;
726 Pairing second = (Pairing)o2;
727 Annotation key1 = first.getKey();
728 Annotation key2 = second.getKey();
729 Annotation res1 = first.getResponse();
730 Annotation res2 = second.getResponse();
731 Long start1 = key1 == null ? null : key1.getStartNode().getOffset();
732 if(start1 == null) start1 = res1.getStartNode().getOffset();
733 Long start2 = key2 == null ? null : key2.getStartNode().getOffset();
734 if(start2 == null) start2 = res2.getStartNode().getOffset();
735 int res = start1.compareTo(start2);
736 if(res == 0){
737 //compare by type
738 res = second.getType() - first.getType();
739 }
740
741 //
742 //
743 //
744 // //choices with keys are smaller than ones without
745 // if(key1 == null && key2 != null) return 1;
746 // if(key1 != null && key2 == null) return -1;
747 // if(key1 == null){
748 // //both keys are null
749 // res = res1.getStartNode().getOffset().
750 // compareTo(res2.getStartNode().getOffset());
751 // if(res == 0) res = res1.getEndNode().getOffset().
752 // compareTo(res2.getEndNode().getOffset());
753 // if(res == 0) res = second.getType() - first.getType();
754 // }else{
755 // //both keys are present
756 // res = key1.getStartNode().getOffset().compareTo(
757 // key2.getStartNode().getOffset());
758 //
759 // if(res == 0){
760 // //choices with responses are smaller than ones without
761 // if(res1 == null && res2 != null) return 1;
762 // if(res1 != null && res2 == null) return -1;
763 // if(res1 != null){
764 // res = res1.getStartNode().getOffset().
765 // compareTo(res2.getStartNode().getOffset());
766 // }
767 // if(res == 0)res = key1.getEndNode().getOffset().compareTo(
768 // key2.getEndNode().getOffset());
769 // if(res == 0 && res1 != null){
770 // res = res1.getEndNode().getOffset().
771 // compareTo(res2.getEndNode().getOffset());
772 // }
773 // if(res == 0) res = second.getType() - first.getType();
774 // }
775 // }
776 return res;
777 }
778
779 }
780
781 /**
782 * A method that returns specific type of annotations
783 * @param type
784 * @return a {@link Set} of {@link Annotation}s.
785 */
786 public Set<Annotation> getAnnotationsOfType(int type) {
787 switch(type) {
788 case CORRECT_TYPE:
789 return (correctAnnotations == null)? new HashSet() : correctAnnotations;
790 case PARTIALLY_CORRECT_TYPE:
791 return (partiallyCorrectAnnotations == null) ? new HashSet() : partiallyCorrectAnnotations;
792 case SPURIOUS_TYPE:
793 return (spuriousAnnotations == null) ? new HashSet() : spuriousAnnotations;
794 case MISSING_TYPE:
795 return (missingAnnotations == null) ? new HashSet() : missingAnnotations;
796 default:
797 return new HashSet();
798 }
799 }
800
801 /**
802 * @return annotation type for all the annotations
803 */
804 public String getAnnotationType() {
805 if (!keyList.isEmpty()) {
806 return ((Annotation) keyList.iterator().next()).getType();
807 } else if (!responseList.isEmpty()) {
808 return ((Annotation) responseList.iterator().next()).getType();
809 } else {
810 return "";
811 }
812 }
813
814 public List<String> getMeasuresRow(Object[] measures, String title) {
815 NumberFormat f = NumberFormat.getInstance(Locale.ENGLISH);
816 f.setMaximumFractionDigits(2);
817 f.setMinimumFractionDigits(2);
818 List<String> row = new ArrayList<String>();
819 row.add(title);
820 row.add(Integer.toString(getCorrectMatches()));
821 row.add(Integer.toString(getMissing()));
822 row.add(Integer.toString(getSpurious()));
823 row.add(Integer.toString(getPartiallyCorrectMatches()));
824 for (Object object : measures) {
825 String measure = (String) object;
826 double beta = Double.valueOf(
827 measure.substring(1,measure.indexOf('-')));
828 if (measure.endsWith("strict")) {
829 row.add(f.format(getRecallStrict()));
830 row.add(f.format(getPrecisionStrict()));
831 row.add(f.format(getFMeasureStrict(beta)));
832 } else if (measure.endsWith("lenient")) {
833 row.add(f.format(getRecallLenient()));
834 row.add(f.format(getPrecisionLenient()));
835 row.add(f.format(getFMeasureLenient(beta)));
836 } else if (measure.endsWith("average")) {
837 row.add(f.format(getRecallAverage()));
838 row.add(f.format(getPrecisionAverage()));
839 row.add(f.format(getFMeasureAverage(beta)));
840 }
841 }
842 return row;
843 }
844
845 public HashSet correctAnnotations, partiallyCorrectAnnotations,
846 missingAnnotations, spuriousAnnotations;
847
848
849 /** Type for correct pairings (when the key and response match completely)*/
850 public static final int CORRECT_TYPE = 0;
851
852 /**
853 * Type for partially correct pairings (when the key and response match
854 * in type and significant features but the spans are just overlapping and
855 * not identical.
856 */
857 public static final int PARTIALLY_CORRECT_TYPE = 1;
858
859 /**
860 * Type for missing pairings (where the key was not matched to a response).
861 */
862 public static final int MISSING_TYPE = 2;
863
864 /**
865 * Type for spurious pairings (where the response is not matching any key).
866 */
867 public static final int SPURIOUS_TYPE = 3;
868
869 /**
870 * Type for mismatched pairings (where the key and response are co-extensive
871 * but they don't match).
872 */
873 public static final int MISMATCH_TYPE = 4;
874
875 /**
876 * Score for a correct pairing.
877 */
878 private static final int CORRECT_VALUE = 3;
879
880 /**
881 * Score for a partially correct pairing.
882 */
883 private static final int PARTIALLY_CORRECT_VALUE = 2;
884
885
886 /**
887 * Score for a mismatched pairing (higher then for WRONG as at least the
888 * offsets were right).
889 */
890 private static final int MISMATCH_VALUE = 1;
891
892 /**
893 * Score for a wrong (missing or spurious) pairing.
894 */
895 private static final int WRONG_VALUE = 0;
896
897 /**
898 * The set of significant features used for matching.
899 */
900 private java.util.Set significantFeaturesSet;
901
902 /**
903 * The number of correct matches.
904 */
905 protected int correctMatches;
906
907 /**
908 * The number of partially correct matches.
909 */
910 protected int partiallyCorrectMatches;
911
912 /**
913 * The number of missing matches.
914 */
915 protected int missing;
916
917 /**
918 * The number of spurious matches.
919 */
920 protected int spurious;
921
922 /**
923 * A list with all the key annotations
924 */
925 protected List keyList;
926
927 /**
928 * A list with all the response annotations
929 */
930 protected List responseList;
931
932 /**
933 * A list of lists representing all possible choices for each key
934 */
935 protected List keyChoices;
936
937 /**
938 * A list of lists representing all possible choices for each response
939 */
940 protected List responseChoices;
941
942 /**
943 * All the posible choices are added to this list for easy iteration.
944 */
945 protected List possibleChoices;
946
947 /**
948 * A list with the choices selected for the best result.
949 */
950 protected List finalChoices;
951
952 }
|