001 package com.ontotext.gate.gazetteer;
002 /*
003 * HashGazetteer.java
004 *
005 * OntoText Lab.
006 *
007 * borislav popov , 09/11/2001
008 *
009 * $Id: TestHashGazetteer.java 8839 2007-06-28 11:24:11Z valyt $
010 */
011
012 import java.util.*;
013 import java.io.*;
014 import java.net.*;
015 import java.beans.*;
016 import java.lang.reflect.*;
017 import junit.framework.*;
018
019 import gate.*;
020 import gate.util.*;
021 import gate.creole.*;
022 import gate.corpora.TestDocument;
023
024 /**
025 * Tests the HashGazetteer.
026 */
027 public class TestHashGazetteer extends TestCase {
028
029 private static final String GAZ_AS = "GazetteerAS";
030 public TestHashGazetteer(String name) {
031 super(name);
032 }
033
034 /** Fixture set up */
035 public void setUp() throws Exception {
036 }
037
038 public void tearDown() throws Exception {
039 } // tearDown
040
041 /** Test the default tokeniser */
042 public void testHashGazetteer() throws Exception {
043 //get a document
044 Document doc = Factory.newDocument(
045 new URL(TestDocument.getTestServerName() + "tests/doc0.html")
046 );
047
048 //create a default gazetteer
049 FeatureMap params = Factory.newFeatureMap();
050 HashGazetteer gaz = (HashGazetteer) Factory.createResource(
051 "com.ontotext.gate.gazetteer.HashGazetteer", params);
052
053 //runtime stuff
054 gaz.setDocument(doc);
055 gaz.setAnnotationSetName(GAZ_AS);
056 gaz.execute();
057
058 // dumpAnnotationSet(doc.getAnnotations(Gaz_AS));
059
060 assertTrue("the Annotation set resulting of the execution of the OntoText "
061 +"Natural Gazetteer is empty."
062 ,!doc.getAnnotations(GAZ_AS).isEmpty());
063 //check whether the annotations are as expected
064
065
066 // assertTrue("Found in "+ doc.getSourceUrl().getFile()+ " "+
067 // doc.getAnnotations(GAZ_AS).size() +
068 // " Lookup annotations, instead of the expected 53.",
069 // doc.getAnnotations(GAZ_AS).size()== 53);
070
071 /*very complex compare */
072 // assertTrue("the Annotation set resulting from the OntoText Natural Gazetteer "
073 // +"is not exactly the same as expected. Possible reasons: change in the test file "
074 // +"doc0.html or malfunctioning of the gazetteer"
075 // ,EqualAnnotationSets(doc.getAnnotations(GAZ_AS)));
076
077 } // testHashGazetteer();
078
079 /** Test suite routine for the test runner */
080 public static Test suite() {
081 return new TestSuite(TestHashGazetteer.class);
082 } // suite
083
084 public static void main(String[] args) {
085 try{
086 Gate.init();
087 TestHashGazetteer testGaz = new TestHashGazetteer("");
088 testGaz.setUp();
089 testGaz.testHashGazetteer();
090 testGaz.tearDown();
091 } catch(Exception e) {
092 e.printStackTrace();
093 }
094 } // main
095
096
097 /** dumps the annotation set to system ouput
098 * @param marks an annotation set
099 */
100 private void dumpAnnotationSet(AnnotationSet marks) {
101 if (marks != null) {
102 Iterator<Annotation> iter = marks.iterator();
103 while(iter.hasNext()) {
104 Annotation lookup = iter.next();
105 FeatureMap lookFeats = lookup.getFeatures();
106 String majorStr = (String) lookFeats.get("majorType");
107 String minorStr = (String) lookFeats.get("minorType");
108 String position = " "+lookup.getStartNode().getOffset()+"-"+ lookup.getEndNode().getOffset();
109 System.out.println(position+":"+majorStr + "." + minorStr + Strings.getNl());
110 }
111 } //if
112 } // void dumpAnnotationSet(AnnotationSet set)
113
114 /** Tests whether the annotation set has the same elements
115 * as statet in DESIRED_ANNOTATIONS
116 * @param marks an annotation set
117 * @return true if they match, false otherwise.
118 */
119 private boolean EqualAnnotationSets(AnnotationSet marks) {
120 boolean areEqual = true;
121 String currentMark = null;
122 int index = 0;
123
124 areEqual = areEqual && (marks.size() == DESIRED_ANNOTATIONS.length);
125
126 if (marks != null) {
127 Iterator<Annotation> iter = marks.iterator();
128
129 while(iter.hasNext() & areEqual) {
130 Annotation lookup = iter.next();
131 FeatureMap lookFeats = lookup.getFeatures();
132 String majorStr = (String) lookFeats.get("majorType");
133 String minorStr = (String) lookFeats.get("minorType");
134 String position = ""+lookup.getStartNode().getOffset()+"-"+ lookup.getEndNode().getOffset();
135
136 currentMark = position+":"+majorStr + "." + minorStr;
137 areEqual = areEqual && (currentMark.equals(DESIRED_ANNOTATIONS[index]));
138 index++;
139 }
140 } else {
141 areEqual = false;
142 } // else
143
144
145 return areEqual;
146 } // boolean testGazAnnotationSet(AnnotationSet marks) {
147
148 private static String [] DESIRED_ANNOTATIONS =
149 {
150 "1067-1072:date_unit.null",
151
152 "1033-1038:person_first.male",
153
154 "1029-1032:title.male",
155
156 "1014-1023:jobtitle.null",
157
158 "1008-1013:jobtitle.null",
159
160 "995-1003:jobtitle.null",
161
162 "846-853:number.null",
163
164 "814-822:date.month",
165
166 "799-802:title.male",
167
168 "765-768:org_ending.null",
169
170 "765-768:cdg.null",
171
172 "753-764:org_key.null",
173
174 "738-741:org_ending.null",
175
176 "738-741:cdg.null",
177
178 "723-737:org_key.null",
179
180 "713-722:organization.company",
181
182 "696-701:cdg.null",
183
184 "677-686:organization.company",
185
186 "664-673:jobtitle.null",
187
188 "658-663:jobtitle.null",
189
190 "645-653:jobtitle.null",
191
192 "636-641:date_unit.null",
193
194 "614-616:stop.null",
195
196 "603-613:organization.company",
197
198 "582-587:cdg.null",
199
200 "555-576:organization.company",
201
202 "546-549:org_ending.null",
203
204 "546-549:cdg.null",
205
206 "529-538:jobtitle.null",
207
208 "523-528:jobtitle.null",
209
210 "510-518:jobtitle.null",
211
212 "484-487:title.male",
213
214 "465-473:jobtitle.null",
215
216 "424-429:person_first.male",
217
218 "414-420:person_first.male",
219
220 "394-399:date_unit.null",
221
222 "379-382:title.male",
223
224 "350-373:jobtitle.null",
225
226 "337-345:jobtitle.null",
227
228 "320-325:person_first.male",
229
230 "295-298:org_ending.null",
231
232 "295-298:cdg.null",
233
234 "274-277:location.province",
235
236 "265-272:location.city",
237
238 "182-189:cdg.null",
239
240 "161-165:person_first.female",
241
242 "100-115:title.civilian",
243
244 "100-115:jobtitle.null",
245
246 "87-95:title.civilian"
247 }; // private static String [] DESIRED_ANNOTATIONS
248 } // TestHashGazetteer
|