PronominalCoref.java
0001 /*
0002  *  PronominalCoref.java
0003  *
0004  *  Copyright (c) 1995-2010, The University of Sheffield. See the file
0005  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
0006  *
0007  *  This file is part of GATE (see http://gate.ac.uk/), and is free
0008  *  software, licenced under the GNU Library General Public License,
0009  *  Version 2, June 1991 (in the distribution as file licence.html,
0010  *  and also available at http://gate.ac.uk/gate/licence.html).
0011  *
0012  *  Marin Dimitrov, 30/Dec/2001
0013  *
0014  *  $Id: PronominalCoref.java 12006 2009-12-01 17:24:28Z thomas_heitz $
0015  */
0016 
0017 package gate.creole.coref;
0018 
0019 import java.net.MalformedURLException;
0020 import java.net.URL;
0021 import java.util.*;
0022 
0023 import junit.framework.Assert;
0024 
0025 import gate.*;
0026 import gate.annotation.AnnotationSetImpl;
0027 import gate.creole.*;
0028 import gate.util.*;
0029 
0030 public class PronominalCoref extends AbstractLanguageAnalyser
0031                               implements ProcessingResource, ANNIEConstants,
0032                               Benchmarkable {
0033 
0034   public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
0035 
0036   public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
0037 
0038   /** --- */
0039   private static final boolean DEBUG = false;
0040 
0041   //JAPE grammars
0042   private static final String QT_GRAMMAR_URL = Files.getGateResource(
0043               "/creole/coref/quoted_text.jape").toString();
0044   private static final String PLEON_GRAMMAR_URL = Files.getGateResource(
0045           "/creole/coref/pleonasm.jape").toString();
0046 
0047   //annotation types
0048   private static final String QUOTED_TEXT_TYPE = "QuotedText";
0049   private static final String PLEONASTIC_TYPE = "PleonasticIt";
0050 
0051   //annotation features
0052   private static final String PRP_CATEGORY = "PRP";
0053   private static final String PRP$_CATEGORY = "PRP$";
0054 
0055   //scope
0056   private static final int SENTENCES_IN_SCOPE = 3;
0057   /** --- */
0058   private static AnnotationOffsetComparator ANNOTATION_OFFSET_COMPARATOR;
0059   /** --- */
0060   private String annotationSetName;
0061   /** --- */
0062   private Transducer qtTransducer;
0063   /** --- */
0064   private Transducer pleonTransducer;
0065   /** --- */
0066   private AnnotationSet defaultAnnotations;
0067   /** --- */
0068   private Sentence[] textSentences;
0069   /** --- */
0070   private Quote[] quotedText;
0071   /** --- */
0072   private Annotation[] pleonasticIt;
0073   /** --- */
0074   private HashMap personGender;
0075   /** --- */
0076   private HashMap anaphor2antecedent;
0077   /** --- */
0078   private static final FeatureMap PRP_RESTRICTION;
0079 
0080   private boolean resolveIt = true;
0081   
0082   /** default ORGANIZATIONS,LOCATION**/
0083   private Set<String> inanimatedSet;
0084   
0085   private String inanimatedEntityTypes;
0086   
0087   private String benchmarkId;
0088 
0089   /** --- */
0090   static {
0091     ANNOTATION_OFFSET_COMPARATOR = new AnnotationOffsetComparator();
0092     PRP_RESTRICTION = new SimpleFeatureMapImpl();
0093     PRP_RESTRICTION.put(TOKEN_CATEGORY_FEATURE_NAME,PRP_CATEGORY);
0094   }
0095 
0096   /** --- */
0097   public PronominalCoref() {
0098 
0099     this.personGender = new HashMap();
0100     this.anaphor2antecedent = new HashMap();
0101     this.qtTransducer = new gate.creole.Transducer();
0102     this.pleonTransducer = new gate.creole.Transducer();
0103     this.inanimatedSet = new HashSet();
0104   }
0105 
0106   /** Initialise this resource, and return it. */
0107   public Resource init() throws ResourceInstantiationException {
0108 
0109     //0. preconditions
0110     Assert.assertNotNull(this.qtTransducer);
0111 
0112     //1. initialise quoted text transducer
0113     URL qtGrammarURL = null;
0114     try {
0115       qtGrammarURL = new URL(QT_GRAMMAR_URL);
0116     }
0117     catch(MalformedURLException mue) {
0118       throw new ResourceInstantiationException(mue);
0119     }
0120     this.qtTransducer.setGrammarURL(qtGrammarURL);
0121     this.qtTransducer.setEncoding("UTF-8");
0122     this.qtTransducer.init();
0123 
0124     //2. initialise pleonastic transducer
0125     URL pleonGrammarURL = null;
0126     try {
0127       pleonGrammarURL = new URL(PLEON_GRAMMAR_URL);
0128     }
0129     catch(MalformedURLException mue) {
0130       throw new ResourceInstantiationException(mue);
0131     }
0132     this.pleonTransducer.setGrammarURL(pleonGrammarURL);
0133     this.pleonTransducer.setEncoding("UTF-8");
0134     this.pleonTransducer.init();
0135 
0136     //3. delegate
0137     return super.init();
0138   // init()
0139 
0140   /**
0141    * Reinitialises the processing resource. After calling this method the
0142    * resource should be in the state it is after calling init.
0143    * If the resource depends on external resources (such as rules files) then
0144    * the resource will re-read those resources. If the data used to create
0145    * the resource has changed since the resource has been created then the
0146    * resource will change too after calling reInit().
0147   */
0148   public void reInit() throws ResourceInstantiationException {
0149 
0150     if (null != this.qtTransducer) {
0151       this.qtTransducer.reInit();
0152     }
0153 
0154     if (null != this.pleonTransducer) {
0155       this.pleonTransducer.reInit();
0156     }
0157 
0158     init();
0159   // reInit()
0160 
0161 
0162   /** Set the document to run on. */
0163   public void setDocument(Document newDocument) {
0164 
0165     //0. precondition
0166 //    Assert.assertNotNull(newDocument);
0167 
0168     //1. set doc for aggregated components
0169     this.qtTransducer.setDocument(newDocument);
0170     this.pleonTransducer.setDocument(newDocument);
0171 
0172     //3. delegate
0173     super.setDocument(newDocument);
0174   }
0175 
0176   /** --- */
0177   public void setAnnotationSetName(String annotationSetName) {
0178     this.annotationSetName = annotationSetName;
0179   }
0180 
0181 
0182   /** --- */
0183   public String getAnnotationSetName() {
0184     return annotationSetName;
0185   }
0186 
0187   /** --- */
0188   public void setResolveIt(Boolean newValue) {
0189     this.resolveIt = newValue.booleanValue();
0190   }
0191 
0192   /** --- */
0193   public Boolean getResolveIt() {
0194     return new Boolean(this.resolveIt);
0195   }
0196 
0197 
0198   /**
0199    * This method runs the coreferencer. It assumes that all the needed parameters
0200    * are set. If they are not, an exception will be fired.
0201    */
0202   public void execute() throws ExecutionException{
0203 
0204     //0. preconditions
0205     if(null == this.document) {
0206       throw new ExecutionException("[coreference] Document is not set!");
0207     }
0208 
0209     //1. preprocess
0210     preprocess();
0211 /*
0212     //2. remove corefs from previous run
0213     String annSetName = this.annotationSetName == null ? "COREF"
0214                                                        : this.annotationSetName;
0215 
0216     AnnotationSet corefSet = this.document.getAnnotations(annSetName);
0217     if (false == corefSet.isEmpty()) {
0218       corefSet.clear();
0219     }
0220 */
0221     //3.get personal pronouns
0222     FeatureMap constraintPRP = new SimpleFeatureMapImpl();
0223     constraintPRP.put(TOKEN_CATEGORY_FEATURE_NAME,PRP_CATEGORY);
0224     AnnotationSet personalPronouns = this.defaultAnnotations.get(TOKEN_ANNOTATION_TYPE,constraintPRP);
0225 
0226     //4.get possesive pronouns
0227     FeatureMap constraintPRP$ = new SimpleFeatureMapImpl();
0228     constraintPRP$.put(TOKEN_CATEGORY_FEATURE_NAME,PRP$_CATEGORY);
0229     AnnotationSet possesivePronouns = this.defaultAnnotations.get(TOKEN_ANNOTATION_TYPE,constraintPRP$);
0230 
0231     //5.combine them
0232     List pronouns = new ArrayList();
0233     if (personalPronouns != null && !personalPronouns.isEmpty()) {
0234       pronouns.addAll(personalPronouns);
0235     }
0236 
0237     if (possesivePronouns != null && !possesivePronouns.isEmpty()) {
0238       pronouns.addAll(possesivePronouns);
0239     }
0240 
0241     //6.do we have pronouns at all?
0242     if (pronouns.isEmpty()) {
0243       //do nothing
0244       return;
0245     }
0246 
0247     //7.sort them according to offset
0248     Object[] arrPronouns = pronouns.toArray();
0249     java.util.Arrays.sort(arrPronouns,ANNOTATION_OFFSET_COMPARATOR);
0250 
0251     //8.cleanup - ease the GC
0252     pronouns = null;
0253     personalPronouns = null;
0254     possesivePronouns = null;
0255 
0256     int prnSentIndex = 0;
0257 
0258 
0259     //10. process all pronouns
0260     for (int i=0; i< arrPronouns.length; i++) {
0261       Annotation currPronoun = (Annotation)arrPronouns[i];
0262       while (this.textSentences[prnSentIndex].getEndOffset().longValue() <
0263                                       currPronoun.getEndNode().getOffset().longValue()) {
0264         prnSentIndex++;
0265       }
0266 
0267       Sentence currSentence = this.textSentences[prnSentIndex];
0268       Assert.assertTrue(currSentence.getStartOffset().longValue() <= currPronoun.getStartNode().getOffset().longValue());
0269       Assert.assertTrue(currSentence.getEndOffset().longValue() >= currPronoun.getEndNode().getOffset().longValue());
0270 
0271       //11. find antecedent (if any) for pronoun
0272       Annotation antc = findAntecedent(currPronoun,prnSentIndex);
0273 
0274       //12. add to the ana2ant hashtable
0275       this.anaphor2antecedent.put(currPronoun,antc);
0276     }
0277 
0278     //done
0279   }
0280 
0281 
0282   /** --- */
0283   public HashMap getResolvedAnaphora() {
0284     return this.anaphor2antecedent;
0285   }
0286 
0287   /** --- */
0288   private Annotation findAntecedent(Annotation currPronoun,int prnSentIndex) {
0289 
0290     //0. preconditions
0291     Assert.assertNotNull(currPronoun);
0292     Assert.assertTrue(prnSentIndex >= 0);
0293     Assert.assertTrue(currPronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0294     Assert.assertTrue(currPronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY||
0295                       currPronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0296 
0297     //1.
0298     String strPronoun = (String)currPronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0299 
0300     Assert.assertNotNull(strPronoun);
0301 
0302     //2. delegate processing to the appropriate methods
0303     if (strPronoun.equalsIgnoreCase("HE"||
0304         strPronoun.equalsIgnoreCase("HIM"||
0305         strPronoun.equalsIgnoreCase("HIS"||
0306         strPronoun.equalsIgnoreCase("HIMSELF")) {
0307       return _resolve$HE$HIM$HIS$HIMSELF$(currPronoun,prnSentIndex);
0308     }
0309     else if (strPronoun.equalsIgnoreCase("SHE"||
0310               strPronoun.equalsIgnoreCase("HER")) {
0311       return _resolve$SHE$HER$(currPronoun,prnSentIndex);
0312     }
0313     else if (strPronoun.equalsIgnoreCase("IT"||
0314               strPronoun.equalsIgnoreCase("ITS"||
0315               strPronoun.equalsIgnoreCase("ITSELF")) {
0316       return _resolve$IT$ITS$ITSELF$(currPronoun,prnSentIndex);
0317     }
0318     else if (strPronoun.equalsIgnoreCase("I"||
0319               strPronoun.equalsIgnoreCase("ME"||
0320               strPronoun.equalsIgnoreCase("MY"||
0321               strPronoun.equalsIgnoreCase("MYSELF")) {
0322       return _resolve$I$ME$MY$MYSELF$(currPronoun,prnSentIndex);
0323     }
0324     else {
0325       if (DEBUG) {
0326         gate.util.Err.println("["+strPronoun+"] is not handled yet...");
0327       }
0328       return null;
0329     }
0330   }
0331 
0332 
0333   boolean isPleonastic(Annotation pronoun) {
0334 
0335     //0. preconditions
0336     Assert.assertNotNull(pronoun);
0337     String str = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0338     Assert.assertTrue(str.equalsIgnoreCase("IT"));
0339 
0340     //1. do we have pleonasms in this text?
0341     if (this.pleonasticIt.length == 0) {
0342       return false;
0343     }
0344 
0345     //2. find closest pleonasm index
0346     int closestPleonasmIndex = java.util.Arrays.binarySearch(this.pleonasticIt,
0347                                                              pronoun,
0348                                                              ANNOTATION_OFFSET_COMPARATOR);
0349     //normalize index
0350     if (closestPleonasmIndex < 0) {
0351       closestPleonasmIndex = -closestPleonasmIndex --1;
0352     }
0353 
0354     //still not good?
0355     if (closestPleonasmIndex < 0) {
0356       closestPleonasmIndex = 0;
0357     }
0358 
0359     //get closest pleonasm
0360     Annotation pleonasm = this.pleonasticIt[closestPleonasmIndex];
0361 
0362 //System.out.println(pleonasm);
0363 //System.out.println(pronoun);
0364 
0365     //3. return true only if the proboun is contained in pleonastic fragment
0366     boolean result =  (pleonasm.getStartNode().getOffset().intValue() <= pronoun.getStartNode().getOffset().intValue()
0367             &&
0368             pleonasm.getEndNode().getOffset().intValue() >= pronoun.getEndNode().getOffset().intValue());
0369 //System.out.println("is pleon=["+result+"]");
0370     return result;
0371   }
0372 
0373 
0374   /** --- */
0375   private Annotation _resolve$HE$HIM$HIS$HIMSELF$(Annotation pronoun, int sentenceIndex) {
0376 
0377     //0. preconditions
0378     Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0379     Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY||
0380                       pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0381     String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0382     Assert.assertTrue(pronounString.equalsIgnoreCase("HE"||
0383                       pronounString.equalsIgnoreCase("HIM"||
0384                       pronounString.equalsIgnoreCase("HIS"||
0385                       pronounString.equalsIgnoreCase("HIMSELF"));
0386 
0387     //1.
0388     boolean antecedentFound = false;
0389     int scopeFirstIndex = sentenceIndex - SENTENCES_IN_SCOPE;
0390     if (scopeFirstIndex < scopeFirstIndex = 0;
0391 
0392     int currSentenceIndex = sentenceIndex;
0393     Annotation bestAntecedent = null;
0394 
0395     while (currSentenceIndex >= scopeFirstIndex || antecedentFound == false) {
0396       Sentence currSentence = this.textSentences[currSentenceIndex];
0397       AnnotationSet persons = currSentence.getPersons();
0398 
0399       Iterator it = persons.iterator();
0400       while (it.hasNext()) {
0401         Annotation currPerson = (Annotation)it.next();
0402         String gender = (String)this.personGender.get(currPerson);
0403 
0404         if (null == gender ||
0405             gender.equalsIgnoreCase("MALE"||
0406             gender.equalsIgnoreCase("UNKNOWN")) {
0407           //hit
0408           antecedentFound = true;
0409 
0410           if (null == bestAntecedent) {
0411             bestAntecedent = currPerson;
0412           }
0413           else {
0414             bestAntecedent = _chooseAntecedent$HE$HIM$HIS$SHE$HER$HIMSELF$(bestAntecedent,currPerson,pronoun);
0415           }
0416         }
0417       }
0418 
0419       if (== currSentenceIndex--)
0420         break;
0421 
0422     }
0423 
0424     return bestAntecedent;
0425   }
0426 
0427 
0428   /** --- */
0429   private Annotation _resolve$SHE$HER$(Annotation pronoun, int sentenceIndex) {
0430 
0431     //0. preconditions
0432     Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0433     Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY||
0434                       pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0435     String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0436     Assert.assertTrue(pronounString.equalsIgnoreCase("SHE"||
0437                       pronounString.equalsIgnoreCase("HER"));
0438 
0439     //1.
0440     boolean antecedentFound = false;
0441     int scopeFirstIndex = sentenceIndex - SENTENCES_IN_SCOPE;
0442     if (scopeFirstIndex < scopeFirstIndex = 0;
0443     int currSentenceIndex = sentenceIndex;
0444     Annotation bestAntecedent = null;
0445 
0446     while (currSentenceIndex >= scopeFirstIndex || antecedentFound == false) {
0447       Sentence currSentence = this.textSentences[currSentenceIndex];
0448       AnnotationSet persons = currSentence.getPersons();
0449 
0450       Iterator it = persons.iterator();
0451       while (it.hasNext()) {
0452         Annotation currPerson = (Annotation)it.next();
0453         String gender = (String)this.personGender.get(currPerson);
0454 
0455         if (null == gender ||
0456             gender.equalsIgnoreCase("FEMALE"||
0457             gender.equalsIgnoreCase("UNKNOWN")) {
0458           //hit
0459           antecedentFound = true;
0460 
0461           if (null == bestAntecedent) {
0462             bestAntecedent = currPerson;
0463           }
0464           else {
0465             bestAntecedent = _chooseAntecedent$HE$HIM$HIS$SHE$HER$HIMSELF$(bestAntecedent,currPerson,pronoun);
0466           }
0467         }
0468       }
0469 
0470       if (== currSentenceIndex--)
0471         break;
0472     }
0473 
0474     return bestAntecedent;
0475   }
0476 
0477 
0478   /** --- */
0479   private Annotation _resolve$IT$ITS$ITSELF$(Annotation pronoun, int sentenceIndex) {
0480     //do not resolve it pronouns if disabled by the user
0481     if (! resolveIt)
0482       return null;
0483 
0484     //0. preconditions
0485     Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0486     Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY||
0487                       pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0488     String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0489     Assert.assertTrue(pronounString.equalsIgnoreCase("IT"||
0490                       pronounString.equalsIgnoreCase("ITS"||
0491                       pronounString.equalsIgnoreCase("ITSELF"));
0492 
0493     //0.5 check if the IT is pleonastic
0494     if (pronounString.equalsIgnoreCase("IT"&&
0495         isPleonastic(pronoun)) {
0496 //System.out.println("PLEONASM...");
0497       return null;
0498     }
0499 
0500     //1.
0501     int scopeFirstIndex = sentenceIndex - 1;
0502     if (scopeFirstIndex < scopeFirstIndex = 0;
0503 
0504     int currSentenceIndex = sentenceIndex;
0505     Annotation bestAntecedent = null;
0506 
0507     while (currSentenceIndex >= scopeFirstIndex) {
0508 
0509       Sentence currSentence = this.textSentences[currSentenceIndex];
0510       Set<Annotation> org_loc = currSentence.getInanimated();
0511 
0512       Iterator it = org_loc.iterator();
0513       while (it.hasNext()) {
0514         Annotation currOrgLoc = (Annotation)it.next();
0515 
0516         if (null == bestAntecedent) {
0517           //discard cataphoric references
0518           if (currOrgLoc.getStartNode().getOffset().longValue() <
0519                                           pronoun.getStartNode().getOffset().longValue()) {
0520             bestAntecedent = currOrgLoc;
0521           }
0522         }
0523         else {
0524           bestAntecedent = this._chooseAntecedent$IT$ITS$ITSELF$(bestAntecedent,currOrgLoc,pronoun);
0525         }
0526       }
0527 
0528       if (== currSentenceIndex--)
0529         break;
0530     }
0531 
0532     return bestAntecedent;
0533   }
0534 
0535 
0536   /** --- */
0537   private Annotation _resolve$I$ME$MY$MYSELF$(Annotation pronoun, int sentenceIndex) {
0538 
0539     //0. preconditions
0540     Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0541     Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY||
0542                       pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0543     String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0544     Assert.assertTrue(pronounString.equalsIgnoreCase("I"||
0545                       pronounString.equalsIgnoreCase("MY"||
0546                       pronounString.equalsIgnoreCase("ME"||
0547                       pronounString.equalsIgnoreCase("MYSELF"));
0548 
0549     //0.5 sanity check
0550     //if there are not quotes at all in the text then exit
0551     if (== this.quotedText.length) {
0552 //System.out.println("TEXT WITH NO QUOTES ENCOUNTERED...");
0553       return null;
0554     }
0555 
0556 
0557     //1.
0558     Annotation bestAntecedent = null;
0559 
0560     int closestQuoteIndex = java.util.Arrays.binarySearch(this.quotedText,pronoun,ANNOTATION_OFFSET_COMPARATOR);
0561     //normalize index
0562     if (closestQuoteIndex < 0) {
0563       closestQuoteIndex = -closestQuoteIndex --1;
0564     }
0565 
0566     //still not good?
0567     if (closestQuoteIndex < 0) {
0568       closestQuoteIndex = 0;
0569     }
0570 
0571     //get closest Quote
0572     Quote quoteContext = this.quotedText[closestQuoteIndex];
0573 
0574     //assure that the pronoun is contained in the quoted text fragment
0575     //otherwise exit
0576 
0577     if (pronoun.getStartNode().getOffset().intValue() > quoteContext.getEndOffset().intValue() ||
0578         pronoun.getEndNode().getOffset().intValue() < quoteContext.getStartOffset().intValue()) {
0579       //oops, probably incorrect text - I/My/Me is not part of quoted text fragment
0580       //exit
0581 //System.out.println("Oops! ["+pronounString+"] not part of quoted fragment...");
0582       return null;
0583     }
0584 
0585     //get the Persons that precede/succeed the quoted fragment
0586     //the order is:
0587     //
0588     //[1]. if there exists a Person or pronoun in {he, she} following the quoted fragment but
0589     //in the same sentence, then use it
0590     //i.e.  ["PRN1(x)...", said X ...A, B, C ....]
0591     //
0592     //[2]. if there is a Person (NOT a pronoun) in the same sentence,
0593     // preceding the quote, then use it
0594     //i.e. . [A, B, C...X ..."PRN1(x) ..."...]
0595     //
0596 
0597     //try [1]
0598     //get the succeeding Persons/pronouns
0599     Set<Annotation> succCandidates = quoteContext.getAntecedentCandidates(Quote.ANTEC_AFTER);
0600     if (false == succCandidates.isEmpty()) {
0601       //cool, we have candidates, pick up the one closest to the end quote
0602       Iterator it = succCandidates.iterator();
0603 
0604       while (it.hasNext()) {
0605         Annotation currCandidate = (Annotation)it.next();
0606         if (null == bestAntecedent || ANNOTATION_OFFSET_COMPARATOR.compare(bestAntecedent,currCandidate0) {
0607           //wow, we have a candidate that is closer to the quote
0608           bestAntecedent = currCandidate;
0609         }
0610       }
0611     }
0612 
0613     //try [2]
0614     //get the preceding Persons/pronouns
0615     if (null == bestAntecedent) {
0616       Set<Annotation> precCandidates = quoteContext.getAntecedentCandidates(Quote.ANTEC_BEFORE);
0617       if (false == precCandidates.isEmpty()) {
0618         //cool, we have candidates, pick up the one closest to the end quote
0619         Iterator it = precCandidates.iterator();
0620 
0621         while (it.hasNext()) {
0622           Annotation currCandidate = (Annotation)it.next();
0623           if (null == bestAntecedent || ANNOTATION_OFFSET_COMPARATOR.compare(bestAntecedent,currCandidate0) {
0624             //wow, we have a candidate that is closer to the quote
0625             bestAntecedent = currCandidate;
0626           }
0627         }
0628       }
0629     }
0630 
0631     //try [3]
0632     //get the Persons/pronouns back in context
0633     if (null == bestAntecedent) {
0634       Set<Annotation> precCandidates = quoteContext.getAntecedentCandidates(Quote.ANTEC_BACK);
0635       if (false == precCandidates.isEmpty()) {
0636         //cool, we have candidates, pick up the one closest to the end quote
0637         Iterator it = precCandidates.iterator();
0638 
0639         while (it.hasNext()) {
0640           Annotation currCandidate = (Annotation)it.next();
0641           if (null == bestAntecedent || ANNOTATION_OFFSET_COMPARATOR.compare(bestAntecedent,currCandidate0) {
0642             //wow, we have a candidate that is closer to the quote
0643             bestAntecedent = currCandidate;
0644           }
0645         }
0646       }
0647     }
0648 
0649     return bestAntecedent;
0650   }
0651 
0652 
0653   /** --- */
0654   private void preprocess() throws ExecutionException {
0655 
0656     //0.5 cleanup
0657     this.personGender.clear();
0658     this.anaphor2antecedent.clear();
0659 
0660     //1.get all annotation in the input set
0661     if this.annotationSetName == null || this.annotationSetName.equals("")) {
0662       this.defaultAnnotations = this.document.getAnnotations();
0663     }
0664     else {
0665       this.defaultAnnotations = this.document.getAnnotations(annotationSetName);
0666     }
0667 
0668     //if none found, print warning and exit
0669     if (this.defaultAnnotations == null || this.defaultAnnotations.isEmpty()) {
0670       Err.prln("Coref Warning: No annotations found for processing!");
0671       return;
0672     }
0673 
0674     // get the list of inanimated entity types 
0675     if (inanimatedEntityTypes==null||inanimatedEntityTypes.equals(""))
0676       inanimatedEntityTypes="Organization;Location";
0677     
0678     String[] types = inanimatedEntityTypes.split(";");
0679     this.inanimatedSet.addAll(Arrays.asList(types));
0680         
0681     //2.1 remove QT annotations if left from previous execution
0682     AnnotationSet qtSet = this.defaultAnnotations.get(QUOTED_TEXT_TYPE);
0683     if (qtSet != null && !qtSet.isEmpty()) {
0684       this.defaultAnnotations.removeAll(qtSet);
0685     }
0686 
0687     //2.2. run quoted text transducer to generate "Quoted Text" annotations
0688     Benchmark.executeWithBenchmarking(this.qtTransducer,
0689             Benchmark.createBenchmarkId("qtTransducer",
0690                     getBenchmarkId()), this, null);
0691 
0692     //3.1 remove pleonastic annotations if left from previous execution
0693     AnnotationSet pleonSet = this.defaultAnnotations.get(PLEONASTIC_TYPE);
0694     if (pleonSet != null && !pleonSet.isEmpty()) {
0695       this.defaultAnnotations.removeAll(pleonSet);
0696     }
0697 
0698     //3.2 run quoted text transducer to generate "Pleonasm" annotations
0699     Benchmark.executeWithBenchmarking(pleonTransducer,
0700             Benchmark.createBenchmarkId("pleonTransducer",
0701                     getBenchmarkId()), this, null);
0702 
0703     //4.get all SENTENCE annotations
0704     AnnotationSet sentenceAnnotations = this.defaultAnnotations.get(SENTENCE_ANNOTATION_TYPE);
0705 
0706     this.textSentences = new Sentence[sentenceAnnotations.size()];
0707     Object[]  sentenceArray = sentenceAnnotations.toArray();
0708 
0709     java.util.Arrays.sort(sentenceArray,ANNOTATION_OFFSET_COMPARATOR);
0710 
0711     for (int i=0; i< sentenceArray.length; i++) {
0712 
0713       Annotation currSentence = (Annotation)sentenceArray[i];
0714       Long sentStartOffset = currSentence.getStartNode().getOffset();
0715       Long sentEndOffset = currSentence.getEndNode().getOffset();
0716       
0717       AnnotationSet tempASOffsets = this.defaultAnnotations.getContained(
0718               sentStartOffset,sentEndOffset);
0719 
0720       //4.1. get PERSONS in this sentence
0721       AnnotationSet sentPersons = tempASOffsets.get(PERSON_ANNOTATION_TYPE);
0722 
0723       //4.2. get inanimated entities (ORGANIZATIONS,LOCATION) in this sentence
0724      
0725       AnnotationSet sentInans = tempASOffsets.get(this.inanimatedSet);
0726 
0727       //4.5. create a Sentence for the SENTENCE annotation
0728       this.textSentences[inew Sentence(i,
0729                                             0,
0730                                             sentStartOffset,
0731                                             sentEndOffset,
0732                                             sentPersons,
0733                                             sentInans
0734                                   );
0735 
0736       //4.6. for all PERSONs in the sentence - find their gender using the
0737       //orthographic coreferences if the gender of some entity is unknown
0738       Iterator itPersons = sentPersons.iterator();
0739       while (itPersons.hasNext()) {
0740         Annotation currPerson = (Annotation)itPersons.next();
0741         String gender = this.findPersonGender(currPerson);
0742         this.personGender.put(currPerson,gender);
0743       }
0744     }
0745 
0746     //5. initialise the quoted text fragments
0747     AnnotationSet sentQuotes = this.defaultAnnotations.get(QUOTED_TEXT_TYPE);
0748 
0749     //if none then return
0750     if (null == sentQuotes) {
0751       this.quotedText = new Quote[0];
0752     }
0753     else {
0754       this.quotedText = new Quote[sentQuotes.size()];
0755 
0756       Object[] quotesArray = sentQuotes.toArray();
0757       java.util.Arrays.sort(quotesArray,ANNOTATION_OFFSET_COMPARATOR);
0758 
0759       for (int i =0; i < quotesArray.length; i++) {
0760         this.quotedText[inew Quote((Annotation)quotesArray[i],i);
0761       }
0762     }
0763 
0764     //6. initialuse the plonastic It annotations
0765     AnnotationSet plaonasticSet = this.defaultAnnotations.get(PLEONASTIC_TYPE);
0766 
0767     if (null == plaonasticSet) {
0768       this.pleonasticIt = new Annotation[0];
0769     }
0770     else {
0771       this.pleonasticIt = new Annotation[plaonasticSet.size()];
0772 
0773       Object[] quotesArray = plaonasticSet.toArray();
0774       java.util.Arrays.sort(quotesArray,ANNOTATION_OFFSET_COMPARATOR);
0775 
0776       for (int i=0; i< this.pleonasticIt.length; i++) {
0777         this.pleonasticIt[i(Annotation)quotesArray[i];
0778       }
0779     }
0780 
0781   }
0782 
0783 
0784   /** --- */
0785   private String findPersonGender(Annotation person) {
0786 
0787     String result = (String)person.getFeatures().get(PERSON_GENDER_FEATURE_NAME);
0788 
0789     if (null==result) {
0790       //gender is unknown - try to find it from the ortho coreferences
0791       List orthoMatches  = (List)person.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
0792 
0793       if (null != orthoMatches) {
0794         Iterator itMatches = orthoMatches.iterator();
0795 
0796         while (itMatches.hasNext()) {
0797           Integer correferringID = (Integer)itMatches.next();
0798           Annotation coreferringEntity = this.defaultAnnotations.get(correferringID);
0799           Assert.assertTrue(coreferringEntity.getType().equalsIgnoreCase(PERSON_ANNOTATION_TYPE));
0800           String correferringGender = (String)coreferringEntity.getFeatures().get(PERSON_GENDER_FEATURE_NAME);
0801 
0802           if (null != correferringGender) {
0803             result = correferringGender;
0804             break;
0805           }
0806         }
0807       }
0808     }
0809 
0810     return result;
0811   }
0812 
0813 
0814   /** --- */
0815   private static class AnnotationOffsetComparator implements Comparator {
0816 
0817     private int _getOffset(Object o) {
0818 
0819       if (instanceof Annotation) {
0820         return ((Annotation)o).getEndNode().getOffset().intValue();
0821       }
0822       else if (instanceof Sentence) {
0823         return ((Sentence)o).getStartOffset().intValue();
0824       }
0825       else if (instanceof Quote) {
0826         return ((Quote)o).getStartOffset().intValue();
0827       }
0828       else if (instanceof Node) {
0829         return ((Node)o).getOffset().intValue();
0830       }
0831       else {
0832         throw new IllegalArgumentException();
0833       }
0834     }
0835 
0836     public int compare(Object o1,Object o2) {
0837 
0838       //0. preconditions
0839       Assert.assertNotNull(o1);
0840       Assert.assertNotNull(o2);
0841       Assert.assertTrue(o1 instanceof Annotation ||
0842                         o1 instanceof Sentence ||
0843                         o1 instanceof Quote ||
0844                         o1 instanceof Node);
0845       Assert.assertTrue(o2 instanceof Annotation ||
0846                         o2 instanceof Sentence ||
0847                         o2 instanceof Quote ||
0848                         o2 instanceof Node);
0849 
0850       int offset1 = _getOffset(o1);
0851       int offset2 = _getOffset(o2);
0852 
0853       return offset1 - offset2;
0854     }
0855   }
0856 
0857 
0858   /** --- */
0859   private Annotation _chooseAntecedent$HE$HIM$HIS$SHE$HER$HIMSELF$(Annotation ant1, Annotation ant2, Annotation pronoun) {
0860 
0861     //0. preconditions
0862     Assert.assertNotNull(ant1);
0863     Assert.assertNotNull(ant2);
0864     Assert.assertNotNull(pronoun);
0865     Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY||
0866                       pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0867     String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0868     Assert.assertTrue(pronounString.equalsIgnoreCase("SHE"||
0869                       pronounString.equalsIgnoreCase("HER"||
0870                       pronounString.equalsIgnoreCase("HE"||
0871                       pronounString.equalsIgnoreCase("HIM"||
0872                       pronounString.equalsIgnoreCase("HIS"||
0873                       pronounString.equalsIgnoreCase("HIMSELF"));
0874 
0875     Long offset1 = ant1.getStartNode().getOffset();
0876     Long offset2 = ant2.getStartNode().getOffset();
0877     Long offsetPrn = pronoun.getStartNode().getOffset();
0878 
0879     long diff1 = offsetPrn.longValue() - offset1.longValue();
0880     long diff2 = offsetPrn.longValue() - offset2.longValue();
0881 //    Assert.assertTrue(diff1 != 0 && diff2 != 0);
0882     //reject candidates that overlap with the pronoun
0883     if (diff1 == 0) {
0884       return ant2;
0885     }
0886     else if (diff2 == 0) {
0887       return ant1;
0888     }
0889 
0890     //get the one CLOSEST AND PRECEDING the pronoun
0891     if (diff1 > && diff2 > 0) {
0892       //we have [...antecedentA...AntecedentB....pronoun...] ==> choose B
0893       if (diff1 < diff2)
0894         return ant1;
0895       else
0896         return ant2;
0897     }
0898     else if (diff1 < && diff2 < 0) {
0899       //we have [...pronoun ...antecedentA...AntecedentB.......] ==> choose A
0900       if (Math.abs(diff1< Math.abs(diff2))
0901         return ant1;
0902       else
0903           return ant2;
0904     }
0905     else {
0906       Assert.assertTrue(Math.abs(diff1 + diff2< Math.abs(diff1+ Math.abs(diff2));
0907       //we have [antecedentA...pronoun...AntecedentB] ==> choose A
0908       if (diff1 > 0)
0909         return ant1;
0910       else
0911         return ant2;
0912     }
0913   }
0914 
0915   /** --- */
0916   private Annotation _chooseAntecedent$IT$ITS$ITSELF$(Annotation ant1, Annotation ant2, Annotation pronoun) {
0917 
0918     //0. preconditions
0919     Assert.assertNotNull(ant1);
0920     Assert.assertNotNull(ant2);
0921     Assert.assertNotNull(pronoun);
0922     Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY||
0923                       pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0924     String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0925 
0926     Assert.assertTrue(pronounString.equalsIgnoreCase("IT"||
0927                       pronounString.equalsIgnoreCase("ITS"||
0928                       pronounString.equalsIgnoreCase("ITSELF"));
0929 
0930     Long offset1 = ant1.getStartNode().getOffset();
0931     Long offset2 = ant2.getStartNode().getOffset();
0932     Long offsetPrn = pronoun.getStartNode().getOffset();
0933     long diff1 = offsetPrn.longValue() - offset1.longValue();
0934     long diff2 = offsetPrn.longValue() - offset2.longValue();
0935 //    Assert.assertTrue(diff1 != 0 && diff2 != 0);
0936     //reject candidates that overlap with the pronoun
0937     if (diff1 == 0) {
0938       return ant2;
0939     }
0940     else if (diff2 == 0) {
0941       return ant1;
0942     }
0943 
0944 
0945     //get the one CLOSEST AND PRECEDING the pronoun
0946     if (diff1 > && diff2 > 0) {
0947       //we have [...antecedentA...AntecedentB....pronoun...] ==> choose B
0948       if (diff1 < diff2)
0949         return ant1;
0950       else
0951         return ant2;
0952     }
0953     else if (diff1 > 0){
0954       Assert.assertTrue(Math.abs(diff1 + diff2< Math.abs(diff1+ Math.abs(diff2));
0955       //we have [antecedentA...pronoun...AntecedentB] ==> choose A
0956       return ant1;
0957     }
0958     else if (diff2 > 0){
0959       Assert.assertTrue(Math.abs(diff1 + diff2< Math.abs(diff1+ Math.abs(diff2));
0960       //we have [antecedentA...pronoun...AntecedentB] ==> choose A
0961       return ant2;
0962     }
0963     else {
0964       //both possible antecedents are BEHIND the anaophoric pronoun - i.e. we have either
0965       //cataphora, or nominal antecedent, or an antecedent that is further back in scope
0966       //in any case - discard the antecedents
0967       return null;
0968     }
0969   }
0970 
0971 
0972   /** --- */
0973   private class Quote {
0974 
0975     /** --- */
0976     public static final int ANTEC_AFTER = 1;
0977     /** --- */
0978     public static final int ANTEC_BEFORE = 2;
0979     /** --- */
0980     public static final int ANTEC_BACK = 3;
0981     /** --- */
0982     private Set<Annotation> antecedentsBefore;
0983     /** --- */
0984     private Set<Annotation> antecedentsAfter;
0985     /** --- */
0986     private Set<Annotation> antecedentsBackInContext;
0987     /** --- */
0988     private Annotation quoteAnnotation;
0989     /** --- */
0990     private int quoteIndex;
0991 
0992     /** --- */
0993     public Quote(Annotation quoteAnnotation, int index) {
0994 
0995       this.quoteAnnotation = quoteAnnotation;
0996       this.quoteIndex = index;
0997       init();
0998     }
0999 
1000     /** --- */
1001     private void init() {
1002 
1003       //0.preconditions
1004       Assert.assertNotNull(textSentences);
1005 
1006       //0.5 create a restriction for PRP pos tokens
1007       FeatureMap prpTokenRestriction = new SimpleFeatureMapImpl();
1008       prpTokenRestriction.put(TOKEN_CATEGORY_FEATURE_NAME,PRP_CATEGORY);
1009 
1010       //1. generate the precPersons set
1011 
1012       //1.1 locate the sentece containing the opening quote marks
1013       int quoteStartPos = java.util.Arrays.binarySearch(textSentences,
1014                                                         this.quoteAnnotation.getStartNode(),
1015                                                         ANNOTATION_OFFSET_COMPARATOR);
1016 
1017       //normalize index
1018       int startSentenceIndex = quoteStartPos >= ? quoteStartPos
1019                                                   : -quoteStartPos --1// blame Sun, not me
1020       //still not good?
1021       if (startSentenceIndex < 0) {
1022         startSentenceIndex = 0;
1023       }
1024 
1025       //1.2. get the persons and restrict to these that precede the quote (i.e. not contained
1026       //in the quote)
1027       this.antecedentsBefore = generateAntecedentCandidates(startSentenceIndex,
1028                                                             this.quoteIndex,
1029                                                             ANTEC_BEFORE);
1030 
1031 
1032       //2. generate the precPersonsInCOntext set
1033       //2.1. get the persons from the sentence precedeing the sentence containing the quote start
1034       if (startSentenceIndex > 0) {
1035         this.antecedentsBackInContext = generateAntecedentCandidates(startSentenceIndex -1,
1036                                                                     this.quoteIndex,
1037                                                                     ANTEC_BACK);
1038       }
1039 
1040       //2. generate the succ  Persons set
1041       //2.1 locate the sentece containing the closing quote marks
1042       int quoteEndPos = java.util.Arrays.binarySearch(textSentences,
1043                                                         this.quoteAnnotation.getEndNode(),
1044                                                         ANNOTATION_OFFSET_COMPARATOR);
1045 
1046       //normalize it
1047       int endSentenceIndex = quoteEndPos >= ? quoteEndPos
1048                                               : -quoteEndPos --1// blame Sun, not me
1049       //still not good?
1050       if (endSentenceIndex < 0) {
1051         endSentenceIndex = 0;
1052       }
1053 
1054       this.antecedentsAfter = generateAntecedentCandidates(endSentenceIndex,
1055                                                             this.quoteIndex,
1056                                                             ANTEC_AFTER);
1057       //generate t
1058     }
1059 
1060 
1061     /** --- */
1062     private Set<Annotation> generateAntecedentCandidates(int sentenceNumber,
1063                                                         int quoteNumber ,
1064                                                         int mode) {
1065 
1066       //0. preconditions
1067       Assert.assertTrue(sentenceNumber >=0);
1068       Assert.assertTrue(quoteNumber >=0);
1069       Assert.assertTrue(mode == Quote.ANTEC_AFTER ||
1070                         mode == Quote.ANTEC_BEFORE ||
1071                         mode == Quote.ANTEC_BACK);
1072 
1073       //1. get sentence
1074      Sentence sentence = textSentences[sentenceNumber];
1075 
1076       //2. get the persons
1077       Set<Annotation> antecedents = new HashSet<Annotation>(sentence.getPersons());
1078 
1079       //4. now get the he/she pronouns in the relevant context
1080       AnnotationSet annotations = null;
1081 
1082       switch(mode) {
1083 
1084         case ANTEC_BEFORE:
1085           annotations = defaultAnnotations.getContained(sentence.getStartOffset(),
1086                                                       this.getStartOffset());
1087           break;
1088 
1089         case ANTEC_AFTER:
1090           annotations = defaultAnnotations.getContained(this.getEndOffset(),
1091                                                      sentence.getEndOffset());
1092           break;
1093 
1094         case ANTEC_BACK:
1095           annotations = defaultAnnotations.getContained(sentence.getStartOffset(),
1096                                                      sentence.getEndOffset());
1097           break;
1098       }
1099 
1100       //4. get the pronouns
1101       //restrict to he/she pronouns
1102       if (null != annotations) {
1103         AnnotationSet pronouns = annotations.get(TOKEN_ANNOTATION_TYPE,PRP_RESTRICTION);
1104 
1105         if (null != pronouns) {
1106 
1107           Iterator it = pronouns.iterator();
1108           while (it.hasNext()) {
1109             Annotation currPronoun = (Annotation)it.next();
1110             //add to succPersons only if HE/SHE
1111             String pronounString = (String)currPronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
1112 
1113             if (null != pronounString &&
1114                 (pronounString.equalsIgnoreCase("he"|| pronounString.equalsIgnoreCase("she"))
1115                 )
1116               antecedents.add(currPronoun);
1117           }//while
1118         }//if
1119       }//if
1120 
1121 
1122       //3. depending on the mode, may have to restrict persons to these that precede/succeed
1123       //the quoted fragment
1124       //
1125       //for ANTEC_BEFORE, get the ones #preceding# the quote, contained in the sentence where
1126       //the quote *starts*
1127       //
1128       //for ANTEC_AFTER, get the ones #succeeding# the quote, contained in the sentence where
1129       //the quote *ends*
1130       //
1131       //for ANTEC_BACK, we are operating in the context of the sentence previous to the
1132       //sentence where the quote starts. I.e. we're resolbinf a case like
1133       // [sss "q1q1q1q1" s1s1s1s1]["q2q2q2q2"]
1134       //...and we want to get the entities from the s1s1 part - they *succeed* the #previous# quote
1135       //Note that the cirrent sentence is the first one, not the second
1136       //
1137       Iterator itPersons = antecedents.iterator();
1138 
1139       while (itPersons.hasNext()) {
1140         Annotation currPerson = (Annotation)itPersons.next();
1141 
1142         //cut
1143         if (Quote.ANTEC_BEFORE == mode &&
1144             currPerson.getStartNode().getOffset().intValue() > getStartOffset().intValue()) {
1145           //restrict only to persosn preceding
1146           itPersons.remove();
1147         }
1148         else if (Quote.ANTEC_AFTER == mode &&
1149                 currPerson.getStartNode().getOffset().intValue() < getEndOffset().intValue()) {
1150           //restrict only to persons succeeding the quote
1151           itPersons.remove();
1152         }
1153         else if (Quote.ANTEC_BACK == mode) {
1154           //this one is tricky
1155           //locate the quote previous to the one we're resolving
1156           //(since we're operating in the sentence previous to the quote being resolved
1157           //wew try to find if any quote (prevQuote) exist in this sentence and get the
1158           //persons succeeding it)
1159 
1160           //get prev quote
1161           //is the curr quote the first one?
1162           if (quoteNumber >0) {
1163             Quote prevQuote = PronominalCoref.this.quotedText[quoteNumber-1];
1164 
1165             //restrict to the succeeding persons
1166             if (currPerson.getStartNode().getOffset().longValue() < prevQuote.getEndOffset().longValue()) {
1167               itPersons.remove();
1168             }
1169           }
1170         }
1171       }
1172 
1173       return antecedents;
1174     }
1175 
1176     /** --- */
1177     public Long getStartOffset() {
1178       return this.quoteAnnotation.getStartNode().getOffset();
1179     }
1180 
1181     /** --- */
1182     public Long getEndOffset() {
1183       return this.quoteAnnotation.getEndNode().getOffset();
1184     }
1185 
1186     /** --- */
1187     public Set<Annotation> getAntecedentCandidates(int type) {
1188 
1189       switch(type) {
1190 
1191         case ANTEC_AFTER:
1192           return null != this.antecedentsAfter ? 
1193                          this.antecedentsAfter : 
1194                          new HashSet<Annotation>();
1195 
1196         case ANTEC_BEFORE:
1197           return null != this.antecedentsBefore ? 
1198                          this.antecedentsBefore : 
1199                          new HashSet<Annotation>();
1200 
1201         case ANTEC_BACK:
1202           return null != this.antecedentsBackInContext ? 
1203                   this.antecedentsBackInContext : 
1204                   new HashSet<Annotation>();
1205 
1206         default:
1207           throw new IllegalArgumentException();
1208       }
1209     }
1210 
1211   }
1212 
1213 
1214   /** --- */
1215   private class Sentence {
1216 
1217     /** --- */
1218     private int sentNumber;
1219     /** --- */
1220     private int paraNumber;
1221     /** --- */
1222     private Long startOffset;
1223     /** --- */
1224     private Long endOffset;
1225     /** --- */
1226     private AnnotationSet persons;
1227     /** --- */
1228     private AnnotationSet inanimated;
1229 
1230     /** --- */
1231     public Sentence(int sentNumber,
1232                     int paraNumber,
1233                     Long startOffset,
1234                     Long endOffset,
1235                     AnnotationSet persons,
1236                     AnnotationSet inanimated) {
1237 
1238       this.sentNumber = sentNumber;
1239       this.paraNumber = paraNumber;
1240       this.startOffset = startOffset;
1241       this.endOffset = endOffset;
1242       this.persons = persons;
1243       this.inanimated = inanimated;
1244     }
1245 
1246     /** --- */
1247     public Long getStartOffset() {
1248       return this.startOffset;
1249     }
1250 
1251     /** --- */
1252     public Long getEndOffset() {
1253       return this.endOffset;
1254     }
1255 
1256     /** --- */
1257     public AnnotationSet getPersons() {
1258       return this.persons;
1259     }
1260 
1261     public AnnotationSet getInanimated() {
1262       return this.inanimated;
1263     }
1264     
1265   }
1266 
1267 
1268   public String getInanimatedEntityTypes() {
1269     return inanimatedEntityTypes;
1270   }
1271 
1272   public void setInanimatedEntityTypes(String inanimatedEntityTypes) {
1273     this.inanimatedEntityTypes = inanimatedEntityTypes;
1274   }
1275 
1276   /* (non-Javadoc)
1277    * @see gate.util.Benchmarkable#getBenchmarkId()
1278    */
1279   public String getBenchmarkId() {
1280     if(benchmarkId == null) {
1281       return getName();
1282     }
1283     else {
1284       return benchmarkId;
1285     }
1286   }
1287 
1288   /* (non-Javadoc)
1289    * @see gate.util.Benchmarkable#setBenchmarkId(java.lang.String)
1290    */
1291   public void setBenchmarkId(String benchmarkId) {
1292     this.benchmarkId = benchmarkId;
1293   }
1294 
1295 }