001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * $Id: GazetteerNode.java 12655 2010-05-18 12:18:39Z thomas_heitz $
011 */
012
013 package gate.creole.gazetteer;
014
015 import gate.util.GateRuntimeException;
016
017 import java.util.*;
018
019 /**
020 * <p>
021 * A node in a gazetteer list allowing an arbitary amount of features
022 * to be added as metadata to an entry, e.g.:
023 * </p>
024 * <p>
025 * With the separator set to '\t', if a gazetteer entry looked like this:
026 * </p>
027 * <pre>Vodaphone	type=mobile phone company</pre>
028 * <p>
029 * Then the GazetteerNode would consist of an entry "Vodaphone", with a featureMap
030 * containing the key "type", mapped to "mobile phone company".
031 * </p>
032 * @author JLy
033 *
034 */
035 public class GazetteerNode {
036 /** The gazetteer entry */
037 private String entry;
038
039 /** The features associated to the entry. If there are no features for this entry, it is null */
040 private Map featureMap = null;
041
042 /** The separator used in a GazetteerNode string */
043 private String separator;
044
045 /**
046 * Constructor. Uses the default separator.
047 *
048 * @param entry the gazetteer entry
049 * @param featureMap a map of name-value pairs
050 */
051 public GazetteerNode(String entry, Map featureMap) {
052 this.entry = entry;
053 this.featureMap = featureMap;
054 }
055
056 /**
057 * Parses and create a gazetteer node from a string using no separator, i.e.
058 * the whole node is considered as the string to match, and there are no
059 * additional features.
060 *
061 * @param node the gazetteer node to be parsed
062 */
063 public GazetteerNode(String node) {
064 this(node, (String) null, false);
065 }
066
067 /**
068 * Parses and create a gazetteer node from a string
069 *
070 * @param node the gazetteer node to be parsed
071 * @param separator the separator used in the gazetteer node string to delimit
072 * each name-value pair of features. If the separator is null, then the whole
073 * node will be used as the gazetteer entry
074 */
075 public GazetteerNode(String node, String separator) {
076 this(node, separator, false);
077 }
078
079 /**
080 * Parses and create a gazetteer node from a string
081 *
082 * @param node the gazetteer node to be parsed
083 * @param separator the separator used in the gazetteer node string to delimit
084 * each name-value pair of features. If the separator is null, then the whole
085 * node will be used as the gazetteer entry
086 * @param isOrdered true if the feature maps used should be ordered
087 */
088 public GazetteerNode(String node, String separator, boolean isOrdered) {
089 this.separator = (separator != null && separator.length() == 0)? null : separator;
090 int index_sep;
091 if(this.separator == null || (index_sep = node.indexOf(this.separator)) == -1 ) {
092 entry = node;
093 // leave featureMap null
094 } else {
095 entry = node.substring(0, index_sep);
096 String features = node.substring(index_sep + 1);
097 featureMap = getFeatures(features, isOrdered);
098 }
099 }
100
101 /**
102 * Given a string of name-value pairs in the format "name=value", separated
103 * by whatever this GazetteerNode's separator has been set to, convert it
104 * to the equivalent map.
105 *
106 * @param features a string in the format "name=value" separated by whatever
107 * the separator has been set to.
108 * @param isOrdered true if the map returned should be ordered
109 * @return a Map of the features
110 */
111 private Map getFeatures(String features, boolean isOrdered) {
112
113 if (separator == null)
114 return null;
115
116 // split the string into name-value pair strings
117 ArrayList<String> tempPairs = new ArrayList<String>();
118
119 int substr_begin = 0;
120 int substr_end = features.indexOf(separator,substr_begin);
121 while (substr_end != -1) {
122 tempPairs.add(features.substring(substr_begin,substr_end));
123 substr_begin = substr_end + 1;
124 substr_end = features.indexOf(separator,substr_begin);
125 }
126
127 String lastPair = features.substring(substr_begin);
128
129 if (lastPair.length() != 0) {
130 tempPairs.add(lastPair);
131 }
132
133 String[] pairs = tempPairs.toArray(new String[tempPairs.size()]);
134
135 if (pairs.length == 0) {
136 return null;
137 }
138
139 // extract the name and value from the pair strings and put in feature map
140 Map<String,String> featureMap;
141 if (isOrdered) {
142 featureMap = new LinkedHashMap<String,String>(pairs.length);
143 } else {
144 featureMap = new HashMap<String,String>(pairs.length);
145 }
146 for(int i = 0; i < pairs.length; i++) {
147 String pair = pairs[i];
148 int sep = pair.indexOf('=');
149 if(sep == -1) {
150 throw new GateRuntimeException("Correct format for gazetteer entry" +
151 " features is: [entry]([separator][featureName]=[featureValue])*");
152 } else {
153 String name = pair.substring(0, sep).trim();
154 String value = pair.substring(sep + 1).trim();
155 if(name.length() > 0 && value.length() > 0) {
156 featureMap.put(name, value);
157 }
158 }
159 }
160
161 if (featureMap.size() == 0) {
162 return null;
163 }
164 return featureMap;
165 }
166
167 /**
168 * Converts a featureMap to separated name value pairs. Note: the string will
169 * begin with the separator character.
170 *
171 * @param featureMap map to be converted
172 * @return string of name/value pairs
173 */
174 public String featureMapToString(Map featureMap) {
175 String str = "";
176 if (featureMap instanceof LinkedHashMap) {
177 for (Object key : featureMap.keySet()) {
178 str += separator + key + "=" + featureMap.get(key);
179 }
180 } else {
181 // sort into a predictable order
182 List sortedKeys = new ArrayList(featureMap.keySet());
183 Collections.sort(sortedKeys);
184 for(Iterator it = sortedKeys.iterator(); it.hasNext();) {
185 String key = (String)it.next();
186 str += separator + key + "=" + featureMap.get(key);
187 }
188 }
189 return str;
190 }
191
192
193
194 /**
195 * Gets the string representation of this node
196 *
197 * @return the string representation of this node
198 */
199 public String toString() {
200 if(featureMap == null || separator == null)
201 return entry;
202 else return entry + featureMapToString(featureMap);
203 }
204
205 /**
206 * Checks this node vs another one for equality.
207 *
208 * @param o another node
209 * @return true if the string representation of the entry and weighting match.
210 */
211 public boolean equals(Object o) {
212 boolean result = false;
213 if(o instanceof GazetteerNode) {
214 result = this.toString().equals(o.toString());
215 }
216 return result;
217 }
218
219 /**
220 * @return the entry
221 */
222 public String getEntry() {
223 return entry;
224 }
225
226 /**
227 * @param entry
228 * the entry to set
229 */
230 public void setEntry(String entry) {
231 this.entry = entry;
232 }
233
234 /**
235 * @return the featureMap
236 */
237 public Map getFeatureMap() {
238 return featureMap;
239 }
240
241 /**
242 * @param featureMap the featureMap to set
243 */
244 public void setFeatureMap(Map featureMap) {
245 this.featureMap = featureMap;
246 }
247
248 /**
249 * @return the separator
250 */
251 public String getSeparator() {
252 return separator;
253 }
254
255 /**
256 * @param separator the separator to set
257 */
258 public void setSeparator(String separator) {
259 this.separator = separator;
260 }
261
262
263 }
|