001 /*
002 * AbstractCoreferencer.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * $Id: AbstractCoreferencer.java 12006 2009-12-01 17:24:28Z thomas_heitz $
013 */
014
015 package gate.creole.coref;
016
017 import java.util.*;
018
019 import gate.*;
020 import gate.creole.AbstractLanguageAnalyser;
021 import gate.creole.ResourceInstantiationException;
022 import gate.util.GateRuntimeException;
023 import gate.util.SimpleFeatureMapImpl;
024
025 public abstract class AbstractCoreferencer extends AbstractLanguageAnalyser
026 implements ProcessingResource{
027
028 public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
029
030 public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
031
032 public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";
033 public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";
034
035 /** --- */
036 private static final boolean DEBUG = false;
037
038 public String coreferenceType;
039
040 /** --- */
041 public AbstractCoreferencer(String type) {
042 this.coreferenceType = type;
043 }
044
045
046 /** Initialise this resource, and return it. */
047 public Resource init() throws ResourceInstantiationException {
048
049 Resource result = super.init();
050
051 return result;
052 } // init()
053
054
055 /**
056 * Reinitialises the processing resource. After calling this method the
057 * resource should be in the state it is after calling init.
058 * If the resource depends on external resources (such as rules files) then
059 * the resource will re-read those resources. If the data used to create
060 * the resource has changed since the resource has been created then the
061 * resource will change too after calling reInit().
062 */
063 public void reInit() throws ResourceInstantiationException {
064 init();
065 } // reInit()
066
067 /** Set the document to run on. */
068 public void setDocument(Document newDocument) {
069 super.setDocument(newDocument);
070 }
071
072 /** --- */
073 public abstract void setAnnotationSetName(String annotationSetName);
074
075 /** --- */
076 public abstract String getAnnotationSetName();
077
078 /** --- */
079 protected void generateCorefChains(HashMap ana2ant)
080 throws GateRuntimeException{
081
082 String asName = getAnnotationSetName();
083 AnnotationSet outputSet = null;
084
085 if (null == asName || asName.equals("")) {
086 outputSet = getDocument().getAnnotations();
087 }
088 else {
089 outputSet = getDocument().getAnnotations(asName);
090 }
091
092 //3. generate new annotations
093 Iterator it = ana2ant.entrySet().iterator();
094 while (it.hasNext()) {
095 Map.Entry currLink = (Map.Entry)it.next();
096 Annotation anaphor = (Annotation)currLink.getKey();
097 Annotation antecedent = (Annotation)currLink.getValue();
098
099 if (DEBUG) {
100 AnnotationSet corefSet = getDocument().getAnnotations("COREF");
101 Long antOffset = new Long(0);
102
103 if (null != antecedent) {
104 antOffset = antecedent.getStartNode().getOffset();
105 }
106
107 FeatureMap features = new SimpleFeatureMapImpl();
108 features.put("antecedent",antOffset);
109 corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features);
110 }
111
112 //do we have antecedent?
113 if (null == antecedent) {
114 continue;
115 }
116
117 //get the ortho-matches of the antecedent
118 List matches = (List)antecedent.getFeatures().
119 get(ANNOTATION_COREF_FEATURE_NAME);
120 if (null == matches) {
121 matches = new ArrayList();
122 matches.add(antecedent.getId());
123 antecedent.getFeatures().
124 put(ANNOTATION_COREF_FEATURE_NAME,matches);
125 //check if the document has a list of matches
126 //if yes, simply add the new list to it
127 //if not, create it and add the list of matches to it
128 if (document.getFeatures().containsKey(
129 DOCUMENT_COREF_FEATURE_NAME)) {
130 Map matchesMap = (Map) document.getFeatures().get(
131 DOCUMENT_COREF_FEATURE_NAME);
132 List matchesList = (List) matchesMap.get(getAnnotationSetName());
133 if (matchesList == null) {
134 matchesList = new ArrayList();
135 matchesMap.put(getAnnotationSetName(), matchesList);
136 }
137 matchesList.add(matches);
138 } else {
139 Map matchesMap = new HashMap();
140 List matchesList = new ArrayList();
141 matchesMap.put(getAnnotationSetName(), matchesList);
142 matchesList.add(matches);
143 }//if else
144 }//if matches == null
145
146 FeatureMap features = new SimpleFeatureMapImpl();
147 features.put(COREF_TYPE_FEATURE_NAME, coreferenceType);
148 features.put(ANNOTATION_COREF_FEATURE_NAME, matches);
149 features.put(COREF_ANTECEDENT_FEATURE_NAME,
150 antecedent.getStartNode().getOffset());
151
152 Integer annID = outputSet.add(anaphor.getStartNode(),
153 anaphor.getEndNode(),
154 antecedent.getType(),
155 features);
156 matches.add(annID);
157 }
158 }
159
160 }
|