0001 /*
0002 * PronominalCoref.java
0003 *
0004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
0005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
0006 *
0007 * This file is part of GATE (see http://gate.ac.uk/), and is free
0008 * software, licenced under the GNU Library General Public License,
0009 * Version 2, June 1991 (in the distribution as file licence.html,
0010 * and also available at http://gate.ac.uk/gate/licence.html).
0011 *
0012 * Marin Dimitrov, 30/Dec/2001
0013 *
0014 * $Id: PronominalCoref.java 12006 2009-12-01 17:24:28Z thomas_heitz $
0015 */
0016
0017 package gate.creole.coref;
0018
0019 import java.net.MalformedURLException;
0020 import java.net.URL;
0021 import java.util.*;
0022
0023 import junit.framework.Assert;
0024
0025 import gate.*;
0026 import gate.annotation.AnnotationSetImpl;
0027 import gate.creole.*;
0028 import gate.util.*;
0029
0030 public class PronominalCoref extends AbstractLanguageAnalyser
0031 implements ProcessingResource, ANNIEConstants,
0032 Benchmarkable {
0033
0034 public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
0035
0036 public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
0037
0038 /** --- */
0039 private static final boolean DEBUG = false;
0040
0041 //JAPE grammars
0042 private static final String QT_GRAMMAR_URL = Files.getGateResource(
0043 "/creole/coref/quoted_text.jape").toString();
0044 private static final String PLEON_GRAMMAR_URL = Files.getGateResource(
0045 "/creole/coref/pleonasm.jape").toString();
0046
0047 //annotation types
0048 private static final String QUOTED_TEXT_TYPE = "QuotedText";
0049 private static final String PLEONASTIC_TYPE = "PleonasticIt";
0050
0051 //annotation features
0052 private static final String PRP_CATEGORY = "PRP";
0053 private static final String PRP$_CATEGORY = "PRP$";
0054
0055 //scope
0056 private static final int SENTENCES_IN_SCOPE = 3;
0057 /** --- */
0058 private static AnnotationOffsetComparator ANNOTATION_OFFSET_COMPARATOR;
0059 /** --- */
0060 private String annotationSetName;
0061 /** --- */
0062 private Transducer qtTransducer;
0063 /** --- */
0064 private Transducer pleonTransducer;
0065 /** --- */
0066 private AnnotationSet defaultAnnotations;
0067 /** --- */
0068 private Sentence[] textSentences;
0069 /** --- */
0070 private Quote[] quotedText;
0071 /** --- */
0072 private Annotation[] pleonasticIt;
0073 /** --- */
0074 private HashMap personGender;
0075 /** --- */
0076 private HashMap anaphor2antecedent;
0077 /** --- */
0078 private static final FeatureMap PRP_RESTRICTION;
0079
0080 private boolean resolveIt = true;
0081
0082 /** default ORGANIZATIONS,LOCATION**/
0083 private Set<String> inanimatedSet;
0084
0085 private String inanimatedEntityTypes;
0086
0087 private String benchmarkId;
0088
0089 /** --- */
0090 static {
0091 ANNOTATION_OFFSET_COMPARATOR = new AnnotationOffsetComparator();
0092 PRP_RESTRICTION = new SimpleFeatureMapImpl();
0093 PRP_RESTRICTION.put(TOKEN_CATEGORY_FEATURE_NAME,PRP_CATEGORY);
0094 }
0095
0096 /** --- */
0097 public PronominalCoref() {
0098
0099 this.personGender = new HashMap();
0100 this.anaphor2antecedent = new HashMap();
0101 this.qtTransducer = new gate.creole.Transducer();
0102 this.pleonTransducer = new gate.creole.Transducer();
0103 this.inanimatedSet = new HashSet();
0104 }
0105
0106 /** Initialise this resource, and return it. */
0107 public Resource init() throws ResourceInstantiationException {
0108
0109 //0. preconditions
0110 Assert.assertNotNull(this.qtTransducer);
0111
0112 //1. initialise quoted text transducer
0113 URL qtGrammarURL = null;
0114 try {
0115 qtGrammarURL = new URL(QT_GRAMMAR_URL);
0116 }
0117 catch(MalformedURLException mue) {
0118 throw new ResourceInstantiationException(mue);
0119 }
0120 this.qtTransducer.setGrammarURL(qtGrammarURL);
0121 this.qtTransducer.setEncoding("UTF-8");
0122 this.qtTransducer.init();
0123
0124 //2. initialise pleonastic transducer
0125 URL pleonGrammarURL = null;
0126 try {
0127 pleonGrammarURL = new URL(PLEON_GRAMMAR_URL);
0128 }
0129 catch(MalformedURLException mue) {
0130 throw new ResourceInstantiationException(mue);
0131 }
0132 this.pleonTransducer.setGrammarURL(pleonGrammarURL);
0133 this.pleonTransducer.setEncoding("UTF-8");
0134 this.pleonTransducer.init();
0135
0136 //3. delegate
0137 return super.init();
0138 } // init()
0139
0140 /**
0141 * Reinitialises the processing resource. After calling this method the
0142 * resource should be in the state it is after calling init.
0143 * If the resource depends on external resources (such as rules files) then
0144 * the resource will re-read those resources. If the data used to create
0145 * the resource has changed since the resource has been created then the
0146 * resource will change too after calling reInit().
0147 */
0148 public void reInit() throws ResourceInstantiationException {
0149
0150 if (null != this.qtTransducer) {
0151 this.qtTransducer.reInit();
0152 }
0153
0154 if (null != this.pleonTransducer) {
0155 this.pleonTransducer.reInit();
0156 }
0157
0158 init();
0159 } // reInit()
0160
0161
0162 /** Set the document to run on. */
0163 public void setDocument(Document newDocument) {
0164
0165 //0. precondition
0166 // Assert.assertNotNull(newDocument);
0167
0168 //1. set doc for aggregated components
0169 this.qtTransducer.setDocument(newDocument);
0170 this.pleonTransducer.setDocument(newDocument);
0171
0172 //3. delegate
0173 super.setDocument(newDocument);
0174 }
0175
0176 /** --- */
0177 public void setAnnotationSetName(String annotationSetName) {
0178 this.annotationSetName = annotationSetName;
0179 }
0180
0181
0182 /** --- */
0183 public String getAnnotationSetName() {
0184 return annotationSetName;
0185 }
0186
0187 /** --- */
0188 public void setResolveIt(Boolean newValue) {
0189 this.resolveIt = newValue.booleanValue();
0190 }
0191
0192 /** --- */
0193 public Boolean getResolveIt() {
0194 return new Boolean(this.resolveIt);
0195 }
0196
0197
0198 /**
0199 * This method runs the coreferencer. It assumes that all the needed parameters
0200 * are set. If they are not, an exception will be fired.
0201 */
0202 public void execute() throws ExecutionException{
0203
0204 //0. preconditions
0205 if(null == this.document) {
0206 throw new ExecutionException("[coreference] Document is not set!");
0207 }
0208
0209 //1. preprocess
0210 preprocess();
0211 /*
0212 //2. remove corefs from previous run
0213 String annSetName = this.annotationSetName == null ? "COREF"
0214 : this.annotationSetName;
0215
0216 AnnotationSet corefSet = this.document.getAnnotations(annSetName);
0217 if (false == corefSet.isEmpty()) {
0218 corefSet.clear();
0219 }
0220 */
0221 //3.get personal pronouns
0222 FeatureMap constraintPRP = new SimpleFeatureMapImpl();
0223 constraintPRP.put(TOKEN_CATEGORY_FEATURE_NAME,PRP_CATEGORY);
0224 AnnotationSet personalPronouns = this.defaultAnnotations.get(TOKEN_ANNOTATION_TYPE,constraintPRP);
0225
0226 //4.get possesive pronouns
0227 FeatureMap constraintPRP$ = new SimpleFeatureMapImpl();
0228 constraintPRP$.put(TOKEN_CATEGORY_FEATURE_NAME,PRP$_CATEGORY);
0229 AnnotationSet possesivePronouns = this.defaultAnnotations.get(TOKEN_ANNOTATION_TYPE,constraintPRP$);
0230
0231 //5.combine them
0232 List pronouns = new ArrayList();
0233 if (personalPronouns != null && !personalPronouns.isEmpty()) {
0234 pronouns.addAll(personalPronouns);
0235 }
0236
0237 if (possesivePronouns != null && !possesivePronouns.isEmpty()) {
0238 pronouns.addAll(possesivePronouns);
0239 }
0240
0241 //6.do we have pronouns at all?
0242 if (pronouns.isEmpty()) {
0243 //do nothing
0244 return;
0245 }
0246
0247 //7.sort them according to offset
0248 Object[] arrPronouns = pronouns.toArray();
0249 java.util.Arrays.sort(arrPronouns,ANNOTATION_OFFSET_COMPARATOR);
0250
0251 //8.cleanup - ease the GC
0252 pronouns = null;
0253 personalPronouns = null;
0254 possesivePronouns = null;
0255
0256 int prnSentIndex = 0;
0257
0258
0259 //10. process all pronouns
0260 for (int i=0; i< arrPronouns.length; i++) {
0261 Annotation currPronoun = (Annotation)arrPronouns[i];
0262 while (this.textSentences[prnSentIndex].getEndOffset().longValue() <
0263 currPronoun.getEndNode().getOffset().longValue()) {
0264 prnSentIndex++;
0265 }
0266
0267 Sentence currSentence = this.textSentences[prnSentIndex];
0268 Assert.assertTrue(currSentence.getStartOffset().longValue() <= currPronoun.getStartNode().getOffset().longValue());
0269 Assert.assertTrue(currSentence.getEndOffset().longValue() >= currPronoun.getEndNode().getOffset().longValue());
0270
0271 //11. find antecedent (if any) for pronoun
0272 Annotation antc = findAntecedent(currPronoun,prnSentIndex);
0273
0274 //12. add to the ana2ant hashtable
0275 this.anaphor2antecedent.put(currPronoun,antc);
0276 }
0277
0278 //done
0279 }
0280
0281
0282 /** --- */
0283 public HashMap getResolvedAnaphora() {
0284 return this.anaphor2antecedent;
0285 }
0286
0287 /** --- */
0288 private Annotation findAntecedent(Annotation currPronoun,int prnSentIndex) {
0289
0290 //0. preconditions
0291 Assert.assertNotNull(currPronoun);
0292 Assert.assertTrue(prnSentIndex >= 0);
0293 Assert.assertTrue(currPronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0294 Assert.assertTrue(currPronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY) ||
0295 currPronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0296
0297 //1.
0298 String strPronoun = (String)currPronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0299
0300 Assert.assertNotNull(strPronoun);
0301
0302 //2. delegate processing to the appropriate methods
0303 if (strPronoun.equalsIgnoreCase("HE") ||
0304 strPronoun.equalsIgnoreCase("HIM") ||
0305 strPronoun.equalsIgnoreCase("HIS") ||
0306 strPronoun.equalsIgnoreCase("HIMSELF")) {
0307 return _resolve$HE$HIM$HIS$HIMSELF$(currPronoun,prnSentIndex);
0308 }
0309 else if (strPronoun.equalsIgnoreCase("SHE") ||
0310 strPronoun.equalsIgnoreCase("HER")) {
0311 return _resolve$SHE$HER$(currPronoun,prnSentIndex);
0312 }
0313 else if (strPronoun.equalsIgnoreCase("IT") ||
0314 strPronoun.equalsIgnoreCase("ITS") ||
0315 strPronoun.equalsIgnoreCase("ITSELF")) {
0316 return _resolve$IT$ITS$ITSELF$(currPronoun,prnSentIndex);
0317 }
0318 else if (strPronoun.equalsIgnoreCase("I") ||
0319 strPronoun.equalsIgnoreCase("ME") ||
0320 strPronoun.equalsIgnoreCase("MY") ||
0321 strPronoun.equalsIgnoreCase("MYSELF")) {
0322 return _resolve$I$ME$MY$MYSELF$(currPronoun,prnSentIndex);
0323 }
0324 else {
0325 if (DEBUG) {
0326 gate.util.Err.println("["+strPronoun+"] is not handled yet...");
0327 }
0328 return null;
0329 }
0330 }
0331
0332
0333 boolean isPleonastic(Annotation pronoun) {
0334
0335 //0. preconditions
0336 Assert.assertNotNull(pronoun);
0337 String str = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0338 Assert.assertTrue(str.equalsIgnoreCase("IT"));
0339
0340 //1. do we have pleonasms in this text?
0341 if (this.pleonasticIt.length == 0) {
0342 return false;
0343 }
0344
0345 //2. find closest pleonasm index
0346 int closestPleonasmIndex = java.util.Arrays.binarySearch(this.pleonasticIt,
0347 pronoun,
0348 ANNOTATION_OFFSET_COMPARATOR);
0349 //normalize index
0350 if (closestPleonasmIndex < 0) {
0351 closestPleonasmIndex = -closestPleonasmIndex -1 -1;
0352 }
0353
0354 //still not good?
0355 if (closestPleonasmIndex < 0) {
0356 closestPleonasmIndex = 0;
0357 }
0358
0359 //get closest pleonasm
0360 Annotation pleonasm = this.pleonasticIt[closestPleonasmIndex];
0361
0362 //System.out.println(pleonasm);
0363 //System.out.println(pronoun);
0364
0365 //3. return true only if the proboun is contained in pleonastic fragment
0366 boolean result = (pleonasm.getStartNode().getOffset().intValue() <= pronoun.getStartNode().getOffset().intValue()
0367 &&
0368 pleonasm.getEndNode().getOffset().intValue() >= pronoun.getEndNode().getOffset().intValue());
0369 //System.out.println("is pleon=["+result+"]");
0370 return result;
0371 }
0372
0373
0374 /** --- */
0375 private Annotation _resolve$HE$HIM$HIS$HIMSELF$(Annotation pronoun, int sentenceIndex) {
0376
0377 //0. preconditions
0378 Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0379 Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY) ||
0380 pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0381 String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0382 Assert.assertTrue(pronounString.equalsIgnoreCase("HE") ||
0383 pronounString.equalsIgnoreCase("HIM") ||
0384 pronounString.equalsIgnoreCase("HIS") ||
0385 pronounString.equalsIgnoreCase("HIMSELF"));
0386
0387 //1.
0388 boolean antecedentFound = false;
0389 int scopeFirstIndex = sentenceIndex - SENTENCES_IN_SCOPE;
0390 if (scopeFirstIndex < 0 ) scopeFirstIndex = 0;
0391
0392 int currSentenceIndex = sentenceIndex;
0393 Annotation bestAntecedent = null;
0394
0395 while (currSentenceIndex >= scopeFirstIndex || antecedentFound == false) {
0396 Sentence currSentence = this.textSentences[currSentenceIndex];
0397 AnnotationSet persons = currSentence.getPersons();
0398
0399 Iterator it = persons.iterator();
0400 while (it.hasNext()) {
0401 Annotation currPerson = (Annotation)it.next();
0402 String gender = (String)this.personGender.get(currPerson);
0403
0404 if (null == gender ||
0405 gender.equalsIgnoreCase("MALE") ||
0406 gender.equalsIgnoreCase("UNKNOWN")) {
0407 //hit
0408 antecedentFound = true;
0409
0410 if (null == bestAntecedent) {
0411 bestAntecedent = currPerson;
0412 }
0413 else {
0414 bestAntecedent = _chooseAntecedent$HE$HIM$HIS$SHE$HER$HIMSELF$(bestAntecedent,currPerson,pronoun);
0415 }
0416 }
0417 }
0418
0419 if (0 == currSentenceIndex--)
0420 break;
0421
0422 }
0423
0424 return bestAntecedent;
0425 }
0426
0427
0428 /** --- */
0429 private Annotation _resolve$SHE$HER$(Annotation pronoun, int sentenceIndex) {
0430
0431 //0. preconditions
0432 Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0433 Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY) ||
0434 pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0435 String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0436 Assert.assertTrue(pronounString.equalsIgnoreCase("SHE") ||
0437 pronounString.equalsIgnoreCase("HER"));
0438
0439 //1.
0440 boolean antecedentFound = false;
0441 int scopeFirstIndex = sentenceIndex - SENTENCES_IN_SCOPE;
0442 if (scopeFirstIndex < 0 ) scopeFirstIndex = 0;
0443 int currSentenceIndex = sentenceIndex;
0444 Annotation bestAntecedent = null;
0445
0446 while (currSentenceIndex >= scopeFirstIndex || antecedentFound == false) {
0447 Sentence currSentence = this.textSentences[currSentenceIndex];
0448 AnnotationSet persons = currSentence.getPersons();
0449
0450 Iterator it = persons.iterator();
0451 while (it.hasNext()) {
0452 Annotation currPerson = (Annotation)it.next();
0453 String gender = (String)this.personGender.get(currPerson);
0454
0455 if (null == gender ||
0456 gender.equalsIgnoreCase("FEMALE") ||
0457 gender.equalsIgnoreCase("UNKNOWN")) {
0458 //hit
0459 antecedentFound = true;
0460
0461 if (null == bestAntecedent) {
0462 bestAntecedent = currPerson;
0463 }
0464 else {
0465 bestAntecedent = _chooseAntecedent$HE$HIM$HIS$SHE$HER$HIMSELF$(bestAntecedent,currPerson,pronoun);
0466 }
0467 }
0468 }
0469
0470 if (0 == currSentenceIndex--)
0471 break;
0472 }
0473
0474 return bestAntecedent;
0475 }
0476
0477
0478 /** --- */
0479 private Annotation _resolve$IT$ITS$ITSELF$(Annotation pronoun, int sentenceIndex) {
0480 //do not resolve it pronouns if disabled by the user
0481 if (! resolveIt)
0482 return null;
0483
0484 //0. preconditions
0485 Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0486 Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY) ||
0487 pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0488 String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0489 Assert.assertTrue(pronounString.equalsIgnoreCase("IT") ||
0490 pronounString.equalsIgnoreCase("ITS") ||
0491 pronounString.equalsIgnoreCase("ITSELF"));
0492
0493 //0.5 check if the IT is pleonastic
0494 if (pronounString.equalsIgnoreCase("IT") &&
0495 isPleonastic(pronoun)) {
0496 //System.out.println("PLEONASM...");
0497 return null;
0498 }
0499
0500 //1.
0501 int scopeFirstIndex = sentenceIndex - 1;
0502 if (scopeFirstIndex < 0 ) scopeFirstIndex = 0;
0503
0504 int currSentenceIndex = sentenceIndex;
0505 Annotation bestAntecedent = null;
0506
0507 while (currSentenceIndex >= scopeFirstIndex) {
0508
0509 Sentence currSentence = this.textSentences[currSentenceIndex];
0510 Set<Annotation> org_loc = currSentence.getInanimated();
0511
0512 Iterator it = org_loc.iterator();
0513 while (it.hasNext()) {
0514 Annotation currOrgLoc = (Annotation)it.next();
0515
0516 if (null == bestAntecedent) {
0517 //discard cataphoric references
0518 if (currOrgLoc.getStartNode().getOffset().longValue() <
0519 pronoun.getStartNode().getOffset().longValue()) {
0520 bestAntecedent = currOrgLoc;
0521 }
0522 }
0523 else {
0524 bestAntecedent = this._chooseAntecedent$IT$ITS$ITSELF$(bestAntecedent,currOrgLoc,pronoun);
0525 }
0526 }
0527
0528 if (0 == currSentenceIndex--)
0529 break;
0530 }
0531
0532 return bestAntecedent;
0533 }
0534
0535
0536 /** --- */
0537 private Annotation _resolve$I$ME$MY$MYSELF$(Annotation pronoun, int sentenceIndex) {
0538
0539 //0. preconditions
0540 Assert.assertTrue(pronoun.getType().equals(TOKEN_ANNOTATION_TYPE));
0541 Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY) ||
0542 pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0543 String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0544 Assert.assertTrue(pronounString.equalsIgnoreCase("I") ||
0545 pronounString.equalsIgnoreCase("MY") ||
0546 pronounString.equalsIgnoreCase("ME") ||
0547 pronounString.equalsIgnoreCase("MYSELF"));
0548
0549 //0.5 sanity check
0550 //if there are not quotes at all in the text then exit
0551 if (0 == this.quotedText.length) {
0552 //System.out.println("TEXT WITH NO QUOTES ENCOUNTERED...");
0553 return null;
0554 }
0555
0556
0557 //1.
0558 Annotation bestAntecedent = null;
0559
0560 int closestQuoteIndex = java.util.Arrays.binarySearch(this.quotedText,pronoun,ANNOTATION_OFFSET_COMPARATOR);
0561 //normalize index
0562 if (closestQuoteIndex < 0) {
0563 closestQuoteIndex = -closestQuoteIndex -1 -1;
0564 }
0565
0566 //still not good?
0567 if (closestQuoteIndex < 0) {
0568 closestQuoteIndex = 0;
0569 }
0570
0571 //get closest Quote
0572 Quote quoteContext = this.quotedText[closestQuoteIndex];
0573
0574 //assure that the pronoun is contained in the quoted text fragment
0575 //otherwise exit
0576
0577 if (pronoun.getStartNode().getOffset().intValue() > quoteContext.getEndOffset().intValue() ||
0578 pronoun.getEndNode().getOffset().intValue() < quoteContext.getStartOffset().intValue()) {
0579 //oops, probably incorrect text - I/My/Me is not part of quoted text fragment
0580 //exit
0581 //System.out.println("Oops! ["+pronounString+"] not part of quoted fragment...");
0582 return null;
0583 }
0584
0585 //get the Persons that precede/succeed the quoted fragment
0586 //the order is:
0587 //
0588 //[1]. if there exists a Person or pronoun in {he, she} following the quoted fragment but
0589 //in the same sentence, then use it
0590 //i.e. ["PRN1(x)...", said X ...A, B, C ....]
0591 //
0592 //[2]. if there is a Person (NOT a pronoun) in the same sentence,
0593 // preceding the quote, then use it
0594 //i.e. . [A, B, C...X ..."PRN1(x) ..."...]
0595 //
0596
0597 //try [1]
0598 //get the succeeding Persons/pronouns
0599 Set<Annotation> succCandidates = quoteContext.getAntecedentCandidates(Quote.ANTEC_AFTER);
0600 if (false == succCandidates.isEmpty()) {
0601 //cool, we have candidates, pick up the one closest to the end quote
0602 Iterator it = succCandidates.iterator();
0603
0604 while (it.hasNext()) {
0605 Annotation currCandidate = (Annotation)it.next();
0606 if (null == bestAntecedent || ANNOTATION_OFFSET_COMPARATOR.compare(bestAntecedent,currCandidate) > 0) {
0607 //wow, we have a candidate that is closer to the quote
0608 bestAntecedent = currCandidate;
0609 }
0610 }
0611 }
0612
0613 //try [2]
0614 //get the preceding Persons/pronouns
0615 if (null == bestAntecedent) {
0616 Set<Annotation> precCandidates = quoteContext.getAntecedentCandidates(Quote.ANTEC_BEFORE);
0617 if (false == precCandidates.isEmpty()) {
0618 //cool, we have candidates, pick up the one closest to the end quote
0619 Iterator it = precCandidates.iterator();
0620
0621 while (it.hasNext()) {
0622 Annotation currCandidate = (Annotation)it.next();
0623 if (null == bestAntecedent || ANNOTATION_OFFSET_COMPARATOR.compare(bestAntecedent,currCandidate) < 0) {
0624 //wow, we have a candidate that is closer to the quote
0625 bestAntecedent = currCandidate;
0626 }
0627 }
0628 }
0629 }
0630
0631 //try [3]
0632 //get the Persons/pronouns back in context
0633 if (null == bestAntecedent) {
0634 Set<Annotation> precCandidates = quoteContext.getAntecedentCandidates(Quote.ANTEC_BACK);
0635 if (false == precCandidates.isEmpty()) {
0636 //cool, we have candidates, pick up the one closest to the end quote
0637 Iterator it = precCandidates.iterator();
0638
0639 while (it.hasNext()) {
0640 Annotation currCandidate = (Annotation)it.next();
0641 if (null == bestAntecedent || ANNOTATION_OFFSET_COMPARATOR.compare(bestAntecedent,currCandidate) > 0) {
0642 //wow, we have a candidate that is closer to the quote
0643 bestAntecedent = currCandidate;
0644 }
0645 }
0646 }
0647 }
0648
0649 return bestAntecedent;
0650 }
0651
0652
0653 /** --- */
0654 private void preprocess() throws ExecutionException {
0655
0656 //0.5 cleanup
0657 this.personGender.clear();
0658 this.anaphor2antecedent.clear();
0659
0660 //1.get all annotation in the input set
0661 if ( this.annotationSetName == null || this.annotationSetName.equals("")) {
0662 this.defaultAnnotations = this.document.getAnnotations();
0663 }
0664 else {
0665 this.defaultAnnotations = this.document.getAnnotations(annotationSetName);
0666 }
0667
0668 //if none found, print warning and exit
0669 if (this.defaultAnnotations == null || this.defaultAnnotations.isEmpty()) {
0670 Err.prln("Coref Warning: No annotations found for processing!");
0671 return;
0672 }
0673
0674 // get the list of inanimated entity types
0675 if (inanimatedEntityTypes==null||inanimatedEntityTypes.equals(""))
0676 inanimatedEntityTypes="Organization;Location";
0677
0678 String[] types = inanimatedEntityTypes.split(";");
0679 this.inanimatedSet.addAll(Arrays.asList(types));
0680
0681 //2.1 remove QT annotations if left from previous execution
0682 AnnotationSet qtSet = this.defaultAnnotations.get(QUOTED_TEXT_TYPE);
0683 if (qtSet != null && !qtSet.isEmpty()) {
0684 this.defaultAnnotations.removeAll(qtSet);
0685 }
0686
0687 //2.2. run quoted text transducer to generate "Quoted Text" annotations
0688 Benchmark.executeWithBenchmarking(this.qtTransducer,
0689 Benchmark.createBenchmarkId("qtTransducer",
0690 getBenchmarkId()), this, null);
0691
0692 //3.1 remove pleonastic annotations if left from previous execution
0693 AnnotationSet pleonSet = this.defaultAnnotations.get(PLEONASTIC_TYPE);
0694 if (pleonSet != null && !pleonSet.isEmpty()) {
0695 this.defaultAnnotations.removeAll(pleonSet);
0696 }
0697
0698 //3.2 run quoted text transducer to generate "Pleonasm" annotations
0699 Benchmark.executeWithBenchmarking(pleonTransducer,
0700 Benchmark.createBenchmarkId("pleonTransducer",
0701 getBenchmarkId()), this, null);
0702
0703 //4.get all SENTENCE annotations
0704 AnnotationSet sentenceAnnotations = this.defaultAnnotations.get(SENTENCE_ANNOTATION_TYPE);
0705
0706 this.textSentences = new Sentence[sentenceAnnotations.size()];
0707 Object[] sentenceArray = sentenceAnnotations.toArray();
0708
0709 java.util.Arrays.sort(sentenceArray,ANNOTATION_OFFSET_COMPARATOR);
0710
0711 for (int i=0; i< sentenceArray.length; i++) {
0712
0713 Annotation currSentence = (Annotation)sentenceArray[i];
0714 Long sentStartOffset = currSentence.getStartNode().getOffset();
0715 Long sentEndOffset = currSentence.getEndNode().getOffset();
0716
0717 AnnotationSet tempASOffsets = this.defaultAnnotations.getContained(
0718 sentStartOffset,sentEndOffset);
0719
0720 //4.1. get PERSONS in this sentence
0721 AnnotationSet sentPersons = tempASOffsets.get(PERSON_ANNOTATION_TYPE);
0722
0723 //4.2. get inanimated entities (ORGANIZATIONS,LOCATION) in this sentence
0724
0725 AnnotationSet sentInans = tempASOffsets.get(this.inanimatedSet);
0726
0727 //4.5. create a Sentence for the SENTENCE annotation
0728 this.textSentences[i] = new Sentence(i,
0729 0,
0730 sentStartOffset,
0731 sentEndOffset,
0732 sentPersons,
0733 sentInans
0734 );
0735
0736 //4.6. for all PERSONs in the sentence - find their gender using the
0737 //orthographic coreferences if the gender of some entity is unknown
0738 Iterator itPersons = sentPersons.iterator();
0739 while (itPersons.hasNext()) {
0740 Annotation currPerson = (Annotation)itPersons.next();
0741 String gender = this.findPersonGender(currPerson);
0742 this.personGender.put(currPerson,gender);
0743 }
0744 }
0745
0746 //5. initialise the quoted text fragments
0747 AnnotationSet sentQuotes = this.defaultAnnotations.get(QUOTED_TEXT_TYPE);
0748
0749 //if none then return
0750 if (null == sentQuotes) {
0751 this.quotedText = new Quote[0];
0752 }
0753 else {
0754 this.quotedText = new Quote[sentQuotes.size()];
0755
0756 Object[] quotesArray = sentQuotes.toArray();
0757 java.util.Arrays.sort(quotesArray,ANNOTATION_OFFSET_COMPARATOR);
0758
0759 for (int i =0; i < quotesArray.length; i++) {
0760 this.quotedText[i] = new Quote((Annotation)quotesArray[i],i);
0761 }
0762 }
0763
0764 //6. initialuse the plonastic It annotations
0765 AnnotationSet plaonasticSet = this.defaultAnnotations.get(PLEONASTIC_TYPE);
0766
0767 if (null == plaonasticSet) {
0768 this.pleonasticIt = new Annotation[0];
0769 }
0770 else {
0771 this.pleonasticIt = new Annotation[plaonasticSet.size()];
0772
0773 Object[] quotesArray = plaonasticSet.toArray();
0774 java.util.Arrays.sort(quotesArray,ANNOTATION_OFFSET_COMPARATOR);
0775
0776 for (int i=0; i< this.pleonasticIt.length; i++) {
0777 this.pleonasticIt[i] = (Annotation)quotesArray[i];
0778 }
0779 }
0780
0781 }
0782
0783
0784 /** --- */
0785 private String findPersonGender(Annotation person) {
0786
0787 String result = (String)person.getFeatures().get(PERSON_GENDER_FEATURE_NAME);
0788
0789 if (null==result) {
0790 //gender is unknown - try to find it from the ortho coreferences
0791 List orthoMatches = (List)person.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
0792
0793 if (null != orthoMatches) {
0794 Iterator itMatches = orthoMatches.iterator();
0795
0796 while (itMatches.hasNext()) {
0797 Integer correferringID = (Integer)itMatches.next();
0798 Annotation coreferringEntity = this.defaultAnnotations.get(correferringID);
0799 Assert.assertTrue(coreferringEntity.getType().equalsIgnoreCase(PERSON_ANNOTATION_TYPE));
0800 String correferringGender = (String)coreferringEntity.getFeatures().get(PERSON_GENDER_FEATURE_NAME);
0801
0802 if (null != correferringGender) {
0803 result = correferringGender;
0804 break;
0805 }
0806 }
0807 }
0808 }
0809
0810 return result;
0811 }
0812
0813
0814 /** --- */
0815 private static class AnnotationOffsetComparator implements Comparator {
0816
0817 private int _getOffset(Object o) {
0818
0819 if (o instanceof Annotation) {
0820 return ((Annotation)o).getEndNode().getOffset().intValue();
0821 }
0822 else if (o instanceof Sentence) {
0823 return ((Sentence)o).getStartOffset().intValue();
0824 }
0825 else if (o instanceof Quote) {
0826 return ((Quote)o).getStartOffset().intValue();
0827 }
0828 else if (o instanceof Node) {
0829 return ((Node)o).getOffset().intValue();
0830 }
0831 else {
0832 throw new IllegalArgumentException();
0833 }
0834 }
0835
0836 public int compare(Object o1,Object o2) {
0837
0838 //0. preconditions
0839 Assert.assertNotNull(o1);
0840 Assert.assertNotNull(o2);
0841 Assert.assertTrue(o1 instanceof Annotation ||
0842 o1 instanceof Sentence ||
0843 o1 instanceof Quote ||
0844 o1 instanceof Node);
0845 Assert.assertTrue(o2 instanceof Annotation ||
0846 o2 instanceof Sentence ||
0847 o2 instanceof Quote ||
0848 o2 instanceof Node);
0849
0850 int offset1 = _getOffset(o1);
0851 int offset2 = _getOffset(o2);
0852
0853 return offset1 - offset2;
0854 }
0855 }
0856
0857
0858 /** --- */
0859 private Annotation _chooseAntecedent$HE$HIM$HIS$SHE$HER$HIMSELF$(Annotation ant1, Annotation ant2, Annotation pronoun) {
0860
0861 //0. preconditions
0862 Assert.assertNotNull(ant1);
0863 Assert.assertNotNull(ant2);
0864 Assert.assertNotNull(pronoun);
0865 Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY) ||
0866 pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0867 String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0868 Assert.assertTrue(pronounString.equalsIgnoreCase("SHE") ||
0869 pronounString.equalsIgnoreCase("HER") ||
0870 pronounString.equalsIgnoreCase("HE") ||
0871 pronounString.equalsIgnoreCase("HIM") ||
0872 pronounString.equalsIgnoreCase("HIS") ||
0873 pronounString.equalsIgnoreCase("HIMSELF"));
0874
0875 Long offset1 = ant1.getStartNode().getOffset();
0876 Long offset2 = ant2.getStartNode().getOffset();
0877 Long offsetPrn = pronoun.getStartNode().getOffset();
0878
0879 long diff1 = offsetPrn.longValue() - offset1.longValue();
0880 long diff2 = offsetPrn.longValue() - offset2.longValue();
0881 // Assert.assertTrue(diff1 != 0 && diff2 != 0);
0882 //reject candidates that overlap with the pronoun
0883 if (diff1 == 0) {
0884 return ant2;
0885 }
0886 else if (diff2 == 0) {
0887 return ant1;
0888 }
0889
0890 //get the one CLOSEST AND PRECEDING the pronoun
0891 if (diff1 > 0 && diff2 > 0) {
0892 //we have [...antecedentA...AntecedentB....pronoun...] ==> choose B
0893 if (diff1 < diff2)
0894 return ant1;
0895 else
0896 return ant2;
0897 }
0898 else if (diff1 < 0 && diff2 < 0) {
0899 //we have [...pronoun ...antecedentA...AntecedentB.......] ==> choose A
0900 if (Math.abs(diff1) < Math.abs(diff2))
0901 return ant1;
0902 else
0903 return ant2;
0904 }
0905 else {
0906 Assert.assertTrue(Math.abs(diff1 + diff2) < Math.abs(diff1) + Math.abs(diff2));
0907 //we have [antecedentA...pronoun...AntecedentB] ==> choose A
0908 if (diff1 > 0)
0909 return ant1;
0910 else
0911 return ant2;
0912 }
0913 }
0914
0915 /** --- */
0916 private Annotation _chooseAntecedent$IT$ITS$ITSELF$(Annotation ant1, Annotation ant2, Annotation pronoun) {
0917
0918 //0. preconditions
0919 Assert.assertNotNull(ant1);
0920 Assert.assertNotNull(ant2);
0921 Assert.assertNotNull(pronoun);
0922 Assert.assertTrue(pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP_CATEGORY) ||
0923 pronoun.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME).equals(PRP$_CATEGORY));
0924 String pronounString = (String)pronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
0925
0926 Assert.assertTrue(pronounString.equalsIgnoreCase("IT") ||
0927 pronounString.equalsIgnoreCase("ITS") ||
0928 pronounString.equalsIgnoreCase("ITSELF"));
0929
0930 Long offset1 = ant1.getStartNode().getOffset();
0931 Long offset2 = ant2.getStartNode().getOffset();
0932 Long offsetPrn = pronoun.getStartNode().getOffset();
0933 long diff1 = offsetPrn.longValue() - offset1.longValue();
0934 long diff2 = offsetPrn.longValue() - offset2.longValue();
0935 // Assert.assertTrue(diff1 != 0 && diff2 != 0);
0936 //reject candidates that overlap with the pronoun
0937 if (diff1 == 0) {
0938 return ant2;
0939 }
0940 else if (diff2 == 0) {
0941 return ant1;
0942 }
0943
0944
0945 //get the one CLOSEST AND PRECEDING the pronoun
0946 if (diff1 > 0 && diff2 > 0) {
0947 //we have [...antecedentA...AntecedentB....pronoun...] ==> choose B
0948 if (diff1 < diff2)
0949 return ant1;
0950 else
0951 return ant2;
0952 }
0953 else if (diff1 > 0){
0954 Assert.assertTrue(Math.abs(diff1 + diff2) < Math.abs(diff1) + Math.abs(diff2));
0955 //we have [antecedentA...pronoun...AntecedentB] ==> choose A
0956 return ant1;
0957 }
0958 else if (diff2 > 0){
0959 Assert.assertTrue(Math.abs(diff1 + diff2) < Math.abs(diff1) + Math.abs(diff2));
0960 //we have [antecedentA...pronoun...AntecedentB] ==> choose A
0961 return ant2;
0962 }
0963 else {
0964 //both possible antecedents are BEHIND the anaophoric pronoun - i.e. we have either
0965 //cataphora, or nominal antecedent, or an antecedent that is further back in scope
0966 //in any case - discard the antecedents
0967 return null;
0968 }
0969 }
0970
0971
0972 /** --- */
0973 private class Quote {
0974
0975 /** --- */
0976 public static final int ANTEC_AFTER = 1;
0977 /** --- */
0978 public static final int ANTEC_BEFORE = 2;
0979 /** --- */
0980 public static final int ANTEC_BACK = 3;
0981 /** --- */
0982 private Set<Annotation> antecedentsBefore;
0983 /** --- */
0984 private Set<Annotation> antecedentsAfter;
0985 /** --- */
0986 private Set<Annotation> antecedentsBackInContext;
0987 /** --- */
0988 private Annotation quoteAnnotation;
0989 /** --- */
0990 private int quoteIndex;
0991
0992 /** --- */
0993 public Quote(Annotation quoteAnnotation, int index) {
0994
0995 this.quoteAnnotation = quoteAnnotation;
0996 this.quoteIndex = index;
0997 init();
0998 }
0999
1000 /** --- */
1001 private void init() {
1002
1003 //0.preconditions
1004 Assert.assertNotNull(textSentences);
1005
1006 //0.5 create a restriction for PRP pos tokens
1007 FeatureMap prpTokenRestriction = new SimpleFeatureMapImpl();
1008 prpTokenRestriction.put(TOKEN_CATEGORY_FEATURE_NAME,PRP_CATEGORY);
1009
1010 //1. generate the precPersons set
1011
1012 //1.1 locate the sentece containing the opening quote marks
1013 int quoteStartPos = java.util.Arrays.binarySearch(textSentences,
1014 this.quoteAnnotation.getStartNode(),
1015 ANNOTATION_OFFSET_COMPARATOR);
1016
1017 //normalize index
1018 int startSentenceIndex = quoteStartPos >= 0 ? quoteStartPos
1019 : -quoteStartPos -1 -1; // blame Sun, not me
1020 //still not good?
1021 if (startSentenceIndex < 0) {
1022 startSentenceIndex = 0;
1023 }
1024
1025 //1.2. get the persons and restrict to these that precede the quote (i.e. not contained
1026 //in the quote)
1027 this.antecedentsBefore = generateAntecedentCandidates(startSentenceIndex,
1028 this.quoteIndex,
1029 ANTEC_BEFORE);
1030
1031
1032 //2. generate the precPersonsInCOntext set
1033 //2.1. get the persons from the sentence precedeing the sentence containing the quote start
1034 if (startSentenceIndex > 0) {
1035 this.antecedentsBackInContext = generateAntecedentCandidates(startSentenceIndex -1,
1036 this.quoteIndex,
1037 ANTEC_BACK);
1038 }
1039
1040 //2. generate the succ Persons set
1041 //2.1 locate the sentece containing the closing quote marks
1042 int quoteEndPos = java.util.Arrays.binarySearch(textSentences,
1043 this.quoteAnnotation.getEndNode(),
1044 ANNOTATION_OFFSET_COMPARATOR);
1045
1046 //normalize it
1047 int endSentenceIndex = quoteEndPos >= 0 ? quoteEndPos
1048 : -quoteEndPos -1 -1; // blame Sun, not me
1049 //still not good?
1050 if (endSentenceIndex < 0) {
1051 endSentenceIndex = 0;
1052 }
1053
1054 this.antecedentsAfter = generateAntecedentCandidates(endSentenceIndex,
1055 this.quoteIndex,
1056 ANTEC_AFTER);
1057 //generate t
1058 }
1059
1060
1061 /** --- */
1062 private Set<Annotation> generateAntecedentCandidates(int sentenceNumber,
1063 int quoteNumber ,
1064 int mode) {
1065
1066 //0. preconditions
1067 Assert.assertTrue(sentenceNumber >=0);
1068 Assert.assertTrue(quoteNumber >=0);
1069 Assert.assertTrue(mode == Quote.ANTEC_AFTER ||
1070 mode == Quote.ANTEC_BEFORE ||
1071 mode == Quote.ANTEC_BACK);
1072
1073 //1. get sentence
1074 Sentence sentence = textSentences[sentenceNumber];
1075
1076 //2. get the persons
1077 Set<Annotation> antecedents = new HashSet<Annotation>(sentence.getPersons());
1078
1079 //4. now get the he/she pronouns in the relevant context
1080 AnnotationSet annotations = null;
1081
1082 switch(mode) {
1083
1084 case ANTEC_BEFORE:
1085 annotations = defaultAnnotations.getContained(sentence.getStartOffset(),
1086 this.getStartOffset());
1087 break;
1088
1089 case ANTEC_AFTER:
1090 annotations = defaultAnnotations.getContained(this.getEndOffset(),
1091 sentence.getEndOffset());
1092 break;
1093
1094 case ANTEC_BACK:
1095 annotations = defaultAnnotations.getContained(sentence.getStartOffset(),
1096 sentence.getEndOffset());
1097 break;
1098 }
1099
1100 //4. get the pronouns
1101 //restrict to he/she pronouns
1102 if (null != annotations) {
1103 AnnotationSet pronouns = annotations.get(TOKEN_ANNOTATION_TYPE,PRP_RESTRICTION);
1104
1105 if (null != pronouns) {
1106
1107 Iterator it = pronouns.iterator();
1108 while (it.hasNext()) {
1109 Annotation currPronoun = (Annotation)it.next();
1110 //add to succPersons only if HE/SHE
1111 String pronounString = (String)currPronoun.getFeatures().get(TOKEN_STRING_FEATURE_NAME);
1112
1113 if (null != pronounString &&
1114 (pronounString.equalsIgnoreCase("he") || pronounString.equalsIgnoreCase("she"))
1115 )
1116 antecedents.add(currPronoun);
1117 }//while
1118 }//if
1119 }//if
1120
1121
1122 //3. depending on the mode, may have to restrict persons to these that precede/succeed
1123 //the quoted fragment
1124 //
1125 //for ANTEC_BEFORE, get the ones #preceding# the quote, contained in the sentence where
1126 //the quote *starts*
1127 //
1128 //for ANTEC_AFTER, get the ones #succeeding# the quote, contained in the sentence where
1129 //the quote *ends*
1130 //
1131 //for ANTEC_BACK, we are operating in the context of the sentence previous to the
1132 //sentence where the quote starts. I.e. we're resolbinf a case like
1133 // [sss "q1q1q1q1" s1s1s1s1]["q2q2q2q2"]
1134 //...and we want to get the entities from the s1s1 part - they *succeed* the #previous# quote
1135 //Note that the cirrent sentence is the first one, not the second
1136 //
1137 Iterator itPersons = antecedents.iterator();
1138
1139 while (itPersons.hasNext()) {
1140 Annotation currPerson = (Annotation)itPersons.next();
1141
1142 //cut
1143 if (Quote.ANTEC_BEFORE == mode &&
1144 currPerson.getStartNode().getOffset().intValue() > getStartOffset().intValue()) {
1145 //restrict only to persosn preceding
1146 itPersons.remove();
1147 }
1148 else if (Quote.ANTEC_AFTER == mode &&
1149 currPerson.getStartNode().getOffset().intValue() < getEndOffset().intValue()) {
1150 //restrict only to persons succeeding the quote
1151 itPersons.remove();
1152 }
1153 else if (Quote.ANTEC_BACK == mode) {
1154 //this one is tricky
1155 //locate the quote previous to the one we're resolving
1156 //(since we're operating in the sentence previous to the quote being resolved
1157 //wew try to find if any quote (prevQuote) exist in this sentence and get the
1158 //persons succeeding it)
1159
1160 //get prev quote
1161 //is the curr quote the first one?
1162 if (quoteNumber >0) {
1163 Quote prevQuote = PronominalCoref.this.quotedText[quoteNumber-1];
1164
1165 //restrict to the succeeding persons
1166 if (currPerson.getStartNode().getOffset().longValue() < prevQuote.getEndOffset().longValue()) {
1167 itPersons.remove();
1168 }
1169 }
1170 }
1171 }
1172
1173 return antecedents;
1174 }
1175
1176 /** --- */
1177 public Long getStartOffset() {
1178 return this.quoteAnnotation.getStartNode().getOffset();
1179 }
1180
1181 /** --- */
1182 public Long getEndOffset() {
1183 return this.quoteAnnotation.getEndNode().getOffset();
1184 }
1185
1186 /** --- */
1187 public Set<Annotation> getAntecedentCandidates(int type) {
1188
1189 switch(type) {
1190
1191 case ANTEC_AFTER:
1192 return null != this.antecedentsAfter ?
1193 this.antecedentsAfter :
1194 new HashSet<Annotation>();
1195
1196 case ANTEC_BEFORE:
1197 return null != this.antecedentsBefore ?
1198 this.antecedentsBefore :
1199 new HashSet<Annotation>();
1200
1201 case ANTEC_BACK:
1202 return null != this.antecedentsBackInContext ?
1203 this.antecedentsBackInContext :
1204 new HashSet<Annotation>();
1205
1206 default:
1207 throw new IllegalArgumentException();
1208 }
1209 }
1210
1211 }
1212
1213
1214 /** --- */
1215 private class Sentence {
1216
1217 /** --- */
1218 private int sentNumber;
1219 /** --- */
1220 private int paraNumber;
1221 /** --- */
1222 private Long startOffset;
1223 /** --- */
1224 private Long endOffset;
1225 /** --- */
1226 private AnnotationSet persons;
1227 /** --- */
1228 private AnnotationSet inanimated;
1229
1230 /** --- */
1231 public Sentence(int sentNumber,
1232 int paraNumber,
1233 Long startOffset,
1234 Long endOffset,
1235 AnnotationSet persons,
1236 AnnotationSet inanimated) {
1237
1238 this.sentNumber = sentNumber;
1239 this.paraNumber = paraNumber;
1240 this.startOffset = startOffset;
1241 this.endOffset = endOffset;
1242 this.persons = persons;
1243 this.inanimated = inanimated;
1244 }
1245
1246 /** --- */
1247 public Long getStartOffset() {
1248 return this.startOffset;
1249 }
1250
1251 /** --- */
1252 public Long getEndOffset() {
1253 return this.endOffset;
1254 }
1255
1256 /** --- */
1257 public AnnotationSet getPersons() {
1258 return this.persons;
1259 }
1260
1261 public AnnotationSet getInanimated() {
1262 return this.inanimated;
1263 }
1264
1265 }
1266
1267
1268 public String getInanimatedEntityTypes() {
1269 return inanimatedEntityTypes;
1270 }
1271
1272 public void setInanimatedEntityTypes(String inanimatedEntityTypes) {
1273 this.inanimatedEntityTypes = inanimatedEntityTypes;
1274 }
1275
1276 /* (non-Javadoc)
1277 * @see gate.util.Benchmarkable#getBenchmarkId()
1278 */
1279 public String getBenchmarkId() {
1280 if(benchmarkId == null) {
1281 return getName();
1282 }
1283 else {
1284 return benchmarkId;
1285 }
1286 }
1287
1288 /* (non-Javadoc)
1289 * @see gate.util.Benchmarkable#setBenchmarkId(java.lang.String)
1290 */
1291 public void setBenchmarkId(String benchmarkId) {
1292 this.benchmarkId = benchmarkId;
1293 }
1294
1295 }
|