001 /*
002 * AbstractGazetteer.java
003 *
004 * Copyright (c) 2002, The University of Sheffield.
005 *
006 * This file is part of GATE (see http://gate.ac.uk/), and is free
007 * software, licenced under the GNU Library General Public License,
008 * Version 2, June1991.
009 *
010 * A copy of this licence is included in the distribution in the file
011 * licence.html, and is also available at http://gate.ac.uk/gate/licence.html.
012 *
013 * borislav popov 02/2002
014 *
015 */
016 package gate.creole.gazetteer;
017
018 import java.util.*;
019
020 import gate.FeatureMap;
021 import gate.creole.ResourceInstantiationException;
022
023 /**AbstractGazetteer
024 * This class implements the common-for-all methods of the Gazetteer interface*/
025 public abstract class AbstractGazetteer
026 extends gate.creole.AbstractLanguageAnalyser implements Gazetteer {
027
028 /** the set of gazetteer listeners */
029 protected Set listeners = new HashSet();
030
031 /** Used to store the annotation set currently being used for the newly
032 * generated annotations*/
033 protected String annotationSetName;
034
035 /** A map of the features */
036 protected FeatureMap features = null;
037
038 /** the encoding of the gazetteer */
039 protected String encoding = "UTF-8";
040
041 /**
042 * The value of this property is the URL that will be used for reading the
043 * lists that define this Gazetteer
044 */
045 protected java.net.URL listsURL;
046
047 /**
048 * Should this gazetteer be case sensitive. The default value is true.
049 */
050 protected Boolean caseSensitive = new Boolean(true);
051
052 /**
053 * Should this gazetteer only match whole words. The default value is
054 * <tt>true</tt>.
055 */
056 protected Boolean wholeWordsOnly = new Boolean(true);
057
058 /**
059 * Should this gazetteer only match the longest string starting from any
060 * offset? This parameter is only relevant when the list of lookups contains
061 * proper prefixes of other entries (e.g when both "Dell" and
062 * "Dell Europe" are in the lists). The default behaviour (when this
063 * parameter is set to <tt>true</tt>) is to only match the longest entry,
064 * "Dell Europe" in this example. This is the default GATE gazetteer
065 * behaviour since version 2.0. Setting this parameter to <tt>false</tt> will
066 * cause the gazetteer to match all possible prefixes.
067 */
068 protected Boolean longestMatchOnly = new Boolean(true);
069
070 /** the linear definition of the gazetteer */
071 protected LinearDefinition definition;
072
073 /** reference to mapping definition info
074 * allows filling of Lookup.ontologyClass according to a list*/
075 protected MappingDefinition mappingDefinition;
076
077
078 /**
079 * Sets the AnnotationSet that will be used at the next run for the newly
080 * produced annotations.
081 */
082 public void setAnnotationSetName(String newAnnotationSetName) {
083 annotationSetName = newAnnotationSetName;
084 }
085
086 /**
087 * Gets the AnnotationSet that will be used at the next run for the newly
088 * produced annotations.
089 */
090 public String getAnnotationSetName() {
091 return annotationSetName;
092 }
093
094 public void setEncoding(String newEncoding) {
095 encoding = newEncoding;
096 }
097
098 public String getEncoding() {
099 return encoding;
100 }
101
102 public java.net.URL getListsURL() {
103 return listsURL;
104 }
105
106 public void setListsURL(java.net.URL newListsURL) {
107 listsURL = newListsURL;
108 }
109
110 public void setCaseSensitive(Boolean newCaseSensitive) {
111 caseSensitive = newCaseSensitive;
112 }
113
114 public Boolean getCaseSensitive() {
115 return caseSensitive;
116 }
117
118 public void setMappingDefinition(MappingDefinition mapping) {
119 mappingDefinition = mapping;
120 }
121
122 public MappingDefinition getMappingDefinition(){
123 return mappingDefinition;
124 }
125
126 /**
127 * @return the longestMatchOnly
128 */
129 public Boolean getLongestMatchOnly() {
130 return longestMatchOnly;
131 }
132
133 /**
134 * @param longestMatchOnly the longestMatchOnly to set
135 */
136 public void setLongestMatchOnly(Boolean longestMatchOnly) {
137 this.longestMatchOnly = longestMatchOnly;
138 }
139
140 /**Gets the linear definition of this gazetteer. there is no parallel
141 * set method because the definition is loaded through the listsUrl
142 * on init().
143 * @return the linear definition of the gazetteer */
144 public LinearDefinition getLinearDefinition() {
145 return definition;
146 }
147
148 /** */
149 public FeatureMap getFeatures(){
150 return features;
151 } // getFeatures
152
153 /** */
154 public void setFeatures(FeatureMap features){
155 this.features = features;
156 } // setFeatures
157
158 public void reInit() throws ResourceInstantiationException {
159 super.reInit();
160 fireGazetteerEvent(new GazetteerEvent(this,GazetteerEvent.REINIT));
161 }//reInit()
162
163 /**
164 * fires a Gazetteer Event
165 * @param ge Gazetteer Event to be fired
166 */
167 public void fireGazetteerEvent(GazetteerEvent ge) {
168 Iterator li = listeners.iterator();
169 while ( li.hasNext()) {
170 GazetteerListener gl = (GazetteerListener) li.next();
171 gl.processGazetteerEvent(ge);
172 }
173 }
174
175 /**
176 * Registers a Gazetteer Listener
177 * @param gl Gazetteer Listener to be registered
178 */
179 public void addGazetteerListener(GazetteerListener gl){
180 if ( null!=gl )
181 listeners.add(gl);
182 }
183
184 /**
185 * Gets the value for the {@link #wholeWordsOnly} parameter.
186 * @return a Boolean value.
187 */
188 public Boolean getWholeWordsOnly() {
189 return wholeWordsOnly;
190 }
191
192 /**
193 * Sets the value for the {@link #wholeWordsOnly} parameter.
194 * @param wholeWordsOnly a Boolean value.
195 */
196 public void setWholeWordsOnly(Boolean wholeWordsOnly) {
197 this.wholeWordsOnly = wholeWordsOnly;
198 }
199
200 }//class AbstractGazetteer
|