001 /*
002 * Coreferencer.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Marin Dimitrov, 18/Dec/2001
013 *
014 * $Id: Coreferencer.java 12006 2009-12-01 17:24:28Z thomas_heitz $
015 */
016
017 package gate.creole.coref;
018
019 import java.util.*;
020
021 import gate.*;
022 import gate.creole.*;
023 import gate.util.GateRuntimeException;
024 import gate.util.SimpleFeatureMapImpl;
025
026 public class Coreferencer extends AbstractLanguageAnalyser implements
027 ProcessingResource {
028
029 public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
030
031 public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
032
033 public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";
034
035 public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";
036
037 /** --- */
038 private static final boolean DEBUG = false;
039
040 /** --- */
041 private PronominalCoref pronominalModule;
042
043 /** --- */
044 public Coreferencer() {
045 this.pronominalModule = new PronominalCoref();
046 }
047
048 /** Initialise this resource, and return it. */
049 public Resource init() throws ResourceInstantiationException {
050
051 Resource result = super.init();
052
053 // load all submodules
054 this.pronominalModule.init();
055
056 return result;
057 } // init()
058
059 /**
060 * Reinitialises the processing resource. After calling this method
061 * the resource should be in the state it is after calling init. If
062 * the resource depends on external resources (such as rules files)
063 * then the resource will re-read those resources. If the data used to
064 * create the resource has changed since the resource has been created
065 * then the resource will change too after calling reInit().
066 */
067 public void reInit() throws ResourceInstantiationException {
068 init();
069 } // reInit()
070
071 /** Set the document to run on. */
072 public void setDocument(Document newDocument) {
073
074 // Assert.assertNotNull(newDocument);
075
076 this.pronominalModule.setDocument(newDocument);
077 super.setDocument(newDocument);
078 }
079
080 /** --- */
081 public void setAnnotationSetName(String annotationSetName) {
082 this.pronominalModule.setAnnotationSetName(annotationSetName);
083 }
084
085 /** --- */
086 public String getAnnotationSetName() {
087 return this.pronominalModule.getAnnotationSetName();
088 }
089
090 /** --- */
091 public void setResolveIt(Boolean newValue) {
092 this.pronominalModule.setResolveIt(newValue);
093 }
094
095 /** --- */
096 public Boolean getResolveIt() {
097 return this.pronominalModule.getResolveIt();
098 }
099
100 /**
101 * This method runs the coreferencer. It assumes that all the needed
102 * parameters are set. If they are not, an exception will be fired.
103 */
104 public void execute() throws ExecutionException {
105
106 fireStatusChanged("Pronominal Coreferencer processing: "
107 + document.getName());
108 this.pronominalModule.execute();
109 generateCorefChains();
110 fireStatusChanged("Pronominal Coreferencer completed");
111 }
112
113 /** --- */
114 private void generateCorefChains() throws GateRuntimeException {
115
116 // 1. get the resolved corefs
117 HashMap ana2ant = this.pronominalModule.getResolvedAnaphora();
118
119 // 2. get the outout annotation set
120 String asName = getAnnotationSetName();
121 AnnotationSet outputSet = null;
122
123 if(null == asName || asName.equals("")) {
124 outputSet = getDocument().getAnnotations();
125 }
126 else {
127 outputSet = getDocument().getAnnotations(asName);
128 }
129
130 // 3. generate new annotations
131 Iterator it = ana2ant.entrySet().iterator();
132 while(it.hasNext()) {
133 Map.Entry currLink = (Map.Entry)it.next();
134 Annotation anaphor = (Annotation)currLink.getKey();
135 Annotation antecedent = (Annotation)currLink.getValue();
136
137 if(DEBUG) {
138 AnnotationSet corefSet = getDocument().getAnnotations("COREF");
139 Long antOffset = new Long(0);
140
141 if(null != antecedent) {
142 antOffset = antecedent.getStartNode().getOffset();
143 }
144
145 FeatureMap features = new SimpleFeatureMapImpl();
146 features.put("antecedent", antOffset);
147 corefSet.add(anaphor.getStartNode(), anaphor.getEndNode(), "COREF",
148 features);
149 }
150
151 // do we have antecedent?
152 if(null == antecedent) {
153 continue;
154 }
155
156 // get the ortho-matches of the antecedent
157 List matches = (List)antecedent.getFeatures().get(
158 ANNOTATION_COREF_FEATURE_NAME);
159 if(null == matches) {
160 matches = new ArrayList();
161 matches.add(antecedent.getId());
162 antecedent.getFeatures().put(ANNOTATION_COREF_FEATURE_NAME, matches);
163 // check if the document has a list of matches
164 // if yes, simply add the new list to it
165 // if not, create it and add the list of matches to it
166 if(document.getFeatures().containsKey(DOCUMENT_COREF_FEATURE_NAME)) {
167 Map matchesMap = (Map)document.getFeatures().get(
168 DOCUMENT_COREF_FEATURE_NAME);
169 List matchesList = (List)matchesMap.get(getAnnotationSetName());
170 if(matchesList == null) {
171 matchesList = new ArrayList();
172 matchesMap.put(getAnnotationSetName(), matchesList);
173 }
174 matchesList.add(matches);
175 }
176 else {
177 Map matchesMap = new HashMap();
178 List matchesList = new ArrayList();
179 matchesMap.put(getAnnotationSetName(), matchesList);
180 matchesList.add(matches);
181 }// if else
182 }// if matches == null
183
184 FeatureMap features = new SimpleFeatureMapImpl();
185 features.put(COREF_TYPE_FEATURE_NAME, "PRONOUN");
186 features.put(ANNOTATION_COREF_FEATURE_NAME, matches);
187 features.put(COREF_ANTECEDENT_FEATURE_NAME, antecedent.getStartNode()
188 .getOffset());
189
190 //see if the annotation we want to add already exists
191 AnnotationSet existing = outputSet.get(antecedent.getType(), anaphor
192 .getStartNode().getOffset(), anaphor.getEndNode().getOffset());
193
194 if(existing.size() > 0) {
195 //if it exists simply update the existing annotation
196 Annotation annot = existing.iterator().next();
197 annot.getFeatures().putAll(features);
198 matches.add(annot.getId());
199 }
200 else {
201 //if it doesn't exist create a new annotation
202 matches.add(outputSet.add(anaphor.getStartNode(), anaphor.getEndNode(),
203 antecedent.getType(), features));
204 }
205 }
206 }
207
208 public String getInanimatedEntityTypes() {
209 return this.pronominalModule.getInanimatedEntityTypes();
210 }
211
212 public void setInanimatedEntityTypes(String inanimatedEntityTypes) {
213 this.pronominalModule.setInanimatedEntityTypes(inanimatedEntityTypes);
214 }
215
216 }
|