001 /*
002 * TestPR.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Oana Hamza,
013 *
014 * $Id: TestPR.java 13371 2011-01-28 14:01:03Z markagreenwood $
015 */
016
017 package gate.creole;
018
019 import java.net.URL;
020 import java.util.*;
021
022 import junit.framework.*;
023
024 import gate.*;
025 import gate.corpora.TestDocument;
026 import gate.creole.gazetteer.DefaultGazetteer;
027 import gate.creole.orthomatcher.OrthoMatcher;
028 import gate.creole.splitter.SentenceSplitter;
029 import gate.creole.tokeniser.DefaultTokeniser;
030 import gate.jape.JapeException;
031 import gate.jape.constraint.*;
032 import gate.util.AnnotationDiffer;
033
034 /** Test the PRs on three documents */
035 public class TestPR extends TestCase
036 {
037 /** Debug flag */
038 private static final boolean DEBUG = false;
039
040 protected static Document doc1;
041 protected static Document doc2;
042 protected static Document doc3;
043 protected static Document doc4;
044
045 protected static List<String> annotationTypes = new ArrayList<String>(10);
046
047 static{
048 annotationTypes.add(ANNIEConstants.SENTENCE_ANNOTATION_TYPE);
049 annotationTypes.add(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE);
050 annotationTypes.add(ANNIEConstants.LOCATION_ANNOTATION_TYPE);
051 annotationTypes.add(ANNIEConstants.PERSON_ANNOTATION_TYPE);
052 annotationTypes.add(ANNIEConstants.DATE_ANNOTATION_TYPE);
053 annotationTypes.add(ANNIEConstants.MONEY_ANNOTATION_TYPE);
054 annotationTypes.add(ANNIEConstants.LOOKUP_ANNOTATION_TYPE);
055 annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
056 try{
057 //get 4 documents
058 if (doc1 == null)
059 doc1 = Factory.newDocument(
060 new URL(TestDocument.getTestServerName() +
061 "tests/ft-bt-03-aug-2001.html"),
062 "ISO-8859-1"
063 );
064
065 if (doc2 == null)
066 doc2 = Factory.newDocument(
067 new URL(TestDocument.getTestServerName() +
068 "tests/gu-Am-Brit-4-aug-2001.html"),
069 "ISO-8859-1"
070 );
071
072 if (doc3 == null)
073 doc3 = Factory.newDocument(
074 new URL(TestDocument.getTestServerName() +
075 "tests/in-outlook-09-aug-2001.html"),
076 "ISO-8859-1"
077 );
078 if (doc4 == null)
079 doc4 = Factory.newDocument(
080 new URL(TestDocument.getTestServerName() +
081 "tests/OrthoMatcherTest.txt"),
082 "UTF-8"
083 );
084 }catch(Exception e){
085 e.printStackTrace();
086 }
087 }
088
089 /** Construction */
090 public TestPR(String name) { super(name); }
091
092 /** Fixture set up */
093 public void setUp() throws Exception {
094 } // setUp
095
096 /** Put things back as they should be after running tests.
097 */
098 public void tearDown() throws Exception {
099 } // tearDown
100
101 public void testTokenizer() throws Exception {
102 FeatureMap params = Factory.newFeatureMap();
103 DefaultTokeniser tokeniser = (DefaultTokeniser) Factory.createResource(
104 "gate.creole.tokeniser.DefaultTokeniser", params);
105
106
107 //run the tokeniser for doc1
108 tokeniser.setDocument(doc1);
109 tokeniser.execute();
110
111 //run the tokeniser for doc2
112 tokeniser.setDocument(doc2);
113 tokeniser.execute();
114
115 //run the tokeniser for doc3
116 tokeniser.setDocument(doc3);
117 tokeniser.execute();
118
119 tokeniser.setDocument(doc4);
120 tokeniser.execute();
121
122 Factory.deleteResource(tokeniser);
123
124 // assertions for doc 1
125 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+
126 doc1.getAnnotations().size() +
127 " Token annotations, instead of the expected 1279.",
128 doc1.getAnnotations().size()== 1279);
129
130 // assertions for doc 2
131 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+
132 doc2.getAnnotations().size() +
133 " Token annotations, instead of the expected 2134.",
134 doc2.getAnnotations().size()== 2134);
135
136 // assertions for doc 3
137 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+
138 doc3.getAnnotations().size() +
139 " Token annotations, instead of the expected 2807.",
140 doc3.getAnnotations().size()== 2807);
141
142 }// testTokenizer
143
144 public void testGazetteer() throws Exception {
145 FeatureMap params = Factory.newFeatureMap();
146 DefaultGazetteer gaz = (DefaultGazetteer) Factory.createResource(
147 "gate.creole.gazetteer.DefaultGazetteer", params);
148
149 //run gazetteer for doc1
150 gaz.setDocument(doc1);
151 gaz.execute();
152
153 //run gazetteer for doc2
154 gaz.setDocument(doc2);
155 gaz.execute();
156
157 //run gazetteer for doc3
158 gaz.setDocument(doc3);
159 gaz.execute();
160
161 //run gazetteer for doc3
162 gaz.setDocument(doc4);
163 gaz.execute();
164
165
166 Factory.deleteResource(gaz);
167
168 // assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+
169 // doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() +
170 // " Lookup annotations, instead of the expected 60.",
171 // doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 60);
172 assertEquals("Wrong number of annotations produced in " +
173 doc1.getSourceUrl().getFile(),
174 57,
175 doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size());
176
177 // assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+
178 // doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() +
179 // " Lookup annotations, instead of the expected 134.",
180 // doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 134);
181 assertEquals("Wrong number of annotations produced in " +
182 doc2.getSourceUrl().getFile(),
183 127,
184 doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size());
185
186 // assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+
187 // doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() +
188 // " Lookup annotations, instead of the expected 144.",
189 // doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 144);
190 assertEquals("Wrong number of annotations produced in " +
191 doc3.getSourceUrl().getFile(),
192 139,
193 doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size());
194 }//testGazetteer
195
196 public void testSplitter() throws Exception {
197 FeatureMap params = Factory.newFeatureMap();
198 SentenceSplitter splitter = (SentenceSplitter) Factory.createResource(
199 "gate.creole.splitter.SentenceSplitter", params);
200
201 //run splitter for doc1
202 splitter.setDocument(doc1);
203 splitter.execute();
204
205 //run splitter for doc2
206 splitter.setDocument(doc2);
207 splitter.execute();
208
209 //run splitter for doc3
210 splitter.setDocument(doc3);
211 splitter.execute();
212
213 //run splitter for doc3
214 splitter.setDocument(doc4);
215 splitter.execute();
216
217
218 Factory.deleteResource(splitter);
219
220 // assertions for doc 1
221 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+
222 doc1.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() +
223 " Sentence annotations, instead of the expected 21.",
224 doc1.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 21);
225
226 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+
227 doc1.getAnnotations().get("Split").size() +
228 " Split annotations, instead of the expected 38.",
229 doc1.getAnnotations().get("Split").size()== 38);
230
231 // assertions for doc 2
232 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+
233 doc2.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() +
234 " Sentence annotations, instead of the expected 52.",
235 doc2.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 52);
236
237 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+
238 doc2.getAnnotations().get("Split").size() +
239 " Split annotations, instead of the expected 75.",
240 doc2.getAnnotations().get("Split").size()== 75);
241
242 // assertions for doc 3
243 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+
244 doc3.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() +
245 " Sentence annotations, instead of the expected 66.",
246 doc3.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 66);
247
248 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+
249 doc3.getAnnotations().get("Split").size() +
250 " Split annotations, instead of the expected 84.",
251 doc3.getAnnotations().get("Split").size()== 84);
252 }//testSplitter
253
254 public void testTagger() throws Exception {
255 FeatureMap params = Factory.newFeatureMap();
256 POSTagger tagger = (POSTagger) Factory.createResource(
257 "gate.creole.POSTagger", params);
258
259
260 //run the tagger for doc1
261 tagger.setDocument(doc1);
262 tagger.execute();
263
264 //run the tagger for doc2
265 tagger.setDocument(doc2);
266 tagger.execute();
267
268 //run the tagger for doc3
269 tagger.setDocument(doc3);
270 tagger.execute();
271
272 //run the tagger for doc3
273 tagger.setDocument(doc4);
274 tagger.execute();
275
276 Factory.deleteResource(tagger);
277
278 HashSet<String> fType = new HashSet<String>();
279 fType.add(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME);
280
281 // assertions for doc 1
282 AnnotationSet annots =
283 doc1.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType);
284
285 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ annots.size() +
286 " Token annotations with category feature, instead of the expected 675.",
287 annots.size() == 675);
288
289 // assertions for doc 2
290 annots = doc2.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType);
291 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+annots.size() +
292 " Token annotations with category feature, instead of the expected 1131.",
293 annots.size() == 1131);
294
295 // assertions for doc 3
296 annots = doc3.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType);
297 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ annots.size() +
298 " Token annotations with category feature, instead of the expected 1447.",
299 annots.size() == 1447);
300 }//testTagger()
301
302 public void testTransducer() throws Exception {
303 FeatureMap params = Factory.newFeatureMap();
304 ANNIETransducer transducer = (ANNIETransducer) Factory.createResource(
305 "gate.creole.ANNIETransducer", params);
306
307 //run the transducer for doc1
308 transducer.setDocument(doc1);
309 transducer.execute();
310
311 //run the transducer for doc2
312 transducer.setDocument(doc2);
313 transducer.execute();
314
315 //run the transducer for doc3
316 transducer.setDocument(doc3);
317 transducer.execute();
318
319 //run the transducer for doc3
320 transducer.setDocument(doc4);
321 transducer.execute();
322
323 Factory.deleteResource(transducer);
324
325 // assertions for doc 1
326 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+
327 doc1.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() +
328 " Organization annotations, instead of the expected 26",
329 doc1.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 26);
330 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+
331 doc1.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() +
332 " Location annotations, instead of the expected 3",
333 doc1.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 3);
334 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+
335 doc1.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() +
336 " Person annotations, instead of the expected 1",
337 doc1.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 1);
338 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+
339 doc1.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() +
340 " Date annotations, instead of the expected 7",
341 doc1.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 7);
342 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+
343 doc1.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() +
344 " Money annotations, instead of the expected 1",
345 doc1.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 1);
346
347 // assertions for doc 2
348 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+
349 doc2.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() +
350 " Organization annotations, instead of the expected 23",
351 doc2.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 24);
352 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+
353 doc2.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() +
354 " Location annotations, instead of the expected 11",
355 doc2.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 11);
356 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+
357 doc2.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() +
358 " Person annotations, instead of the expected 1",
359 doc2.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 1);
360 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+
361 doc2.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() +
362 " Date annotations, instead of the expected 8",
363 doc2.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 8);
364 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+
365 doc2.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() +
366 " Money annotations, instead of the expected 3",
367 doc2.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 3);
368
369 // assertions for doc 3
370 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+
371 doc3.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() +
372 " Organization annotations, instead of the expected 32",
373 doc3.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 32);
374 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+
375 doc3.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() +
376 " Location annotations, instead of the expected 11",
377 doc3.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 11);
378 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+
379 doc3.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() +
380 " Person annotations, instead of the expected 8",
381 doc3.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 8);
382 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+
383 doc3.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() +
384 " Date annotations, instead of the expected 7",
385 doc3.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 7);
386 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+
387 doc3.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() +
388 " Money annotations, instead of the expected 4",
389 doc3.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 4);
390
391 assertEquals("Wrong number of Person annotations in OrthoMatcher test document",22,
392 doc4.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size());
393 }//testTransducer
394
395 public void testCustomConstraintDefs() throws Exception {
396 FeatureMap params = Factory.newFeatureMap();
397
398 List<String> operators = new ArrayList<String>();
399 params.put("operators", operators);
400 ConstraintPredicate testPred = new TestConstraintPredicate();
401 operators.add(testPred.getClass().getName());
402
403 List<String> accessors = new ArrayList<String>();
404 params.put("annotationAccessors", accessors);
405 AnnotationAccessor testAccessor = new TestAnnotationAccessor();
406 accessors.add(testAccessor.getClass().getName());
407
408 ANNIETransducer transducer = (ANNIETransducer) Factory.createResource(
409 "gate.creole.ANNIETransducer", params);
410
411 assertEquals(accessors, transducer.getAnnotationAccessors());
412 assertEquals(operators, transducer.getOperators());
413
414 ConstraintPredicate returnedPred = Factory.getConstraintFactory().createPredicate("fooOp", testAccessor, "fooValue");
415 assertNotNull(returnedPred);
416 assertEquals("Operator not set", testPred.getClass(), returnedPred.getClass());
417
418 AnnotationAccessor returnAccessor = Factory.getConstraintFactory().createMetaPropertyAccessor("fooProp");
419 assertNotNull(returnAccessor);
420 assertEquals("Accessor not set", testAccessor.getClass(), returnAccessor.getClass());
421 }
422
423 public void testOrthomatcher() throws Exception {
424 FeatureMap params = Factory.newFeatureMap();
425
426 OrthoMatcher orthomatcher = (OrthoMatcher) Factory.createResource(
427 "gate.creole.orthomatcher.OrthoMatcher", params);
428
429
430 // run the orthomatcher for doc1
431 orthomatcher.setDocument(doc1);
432 orthomatcher.execute();
433
434 //run the orthomatcher for doc2
435 orthomatcher.setDocument(doc2);
436 orthomatcher.execute();
437
438 //run the orthomatcher for doc3
439 orthomatcher.setDocument(doc3);
440 orthomatcher.execute();
441
442 //run the orthomatcher for doc3
443 orthomatcher.setDocument(doc4);
444 orthomatcher.execute();
445
446 Factory.deleteResource(orthomatcher);
447
448 HashSet<String> fType = new HashSet<String>();
449 fType.add(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
450 AnnotationSet annots =
451 doc1.getAnnotations().get(null,fType);
452
453 // assertEquals("Wrong number of annotations with matches feature",
454 // 17, annots.size());
455
456 annots = doc2.getAnnotations().get(null,fType);
457 // assertEquals("Wrong number of annotations with matches feature",
458 // 31, annots.size());
459
460 annots = doc3.getAnnotations().get(null,fType);
461 // assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ annots.size() +
462 // " annotations with matches feature, instead of the expected 39.",
463 // annots.size() == 39);
464
465 AnnotationSet personAnnots = doc4.getAnnotations().get("Person");
466 Annotation sarahAnnot = personAnnots.get(new Long(806), new Long(811)).iterator().next();
467 assertEquals("Wrong number of matches for second Sarah in document", 2,
468 ((java.util.ArrayList) sarahAnnot.getFeatures().get("matches")).size());
469
470 Annotation robertQJones = personAnnots.get(new Long(300), new Long(315)).iterator().next();
471 assertEquals("Wrong number of matches for Robert Q Jones in document", 3,
472 ((java.util.ArrayList) robertQJones.getFeatures().get("matches")).size());
473
474 Annotation robertCJones = personAnnots.get(new Long(0), new Long(15)).iterator().next();
475 assertEquals("Wrong number of matches for Robert C Jones in document", 3,
476 ((java.util.ArrayList) robertCJones.getFeatures().get("matches")).size());
477
478 Annotation robertAnderson = personAnnots.get(new Long(1188), new Long(1203)).iterator().next();
479 assertEquals("Found a match for Robert Anderson, but he should not have been matched.", false,
480 robertAnderson.getFeatures().containsKey("matches"));
481
482
483
484 }//testOrthomatcher
485
486 /** A test for comparing the annotation sets*/
487 public void testAllPR() throws Exception {
488
489 // verify if the saved data store is the same with the just processed file
490 // first document
491 String urlBaseName = Gate.locateGateFiles();
492 // RE re1 = new RE("build/gate.jar!");
493 // RE re2 = new RE("jar:");
494 // urlBaseName = re1.substituteAll( urlBaseName,"classes");
495 // urlBaseName = re2.substituteAll( urlBaseName,"");
496
497 if (urlBaseName.endsWith("/bin/gate.jar!/")) {
498 StringBuffer buff = new StringBuffer(
499 urlBaseName.substring(
500 0,
501 urlBaseName.lastIndexOf("bin/gate.jar!/"))
502 );
503 buff.append("classes/");
504 buff.delete(0, "jar:file:".length());
505 buff.insert(0, "file://");
506 urlBaseName = buff.toString();
507 }
508
509 URL urlBase = new URL(urlBaseName + "gate/resources/gate.ac.uk/");
510
511 URL storageDir = null;
512 storageDir = new URL(urlBase, "tests/ft");
513
514 //open the data store
515 DataStore ds = Factory.openDataStore
516 ("gate.persist.SerialDataStore",
517 storageDir.toExternalForm());
518
519 //get LR id
520 String lrId = (String)ds.getLrIds
521 ("gate.corpora.DocumentImpl").get(0);
522
523
524 // get the document from data store
525 FeatureMap features = Factory.newFeatureMap();
526 features.put(DataStore.DATASTORE_FEATURE_NAME, ds);
527 features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
528 Document document = (Document) Factory.createResource(
529 "gate.corpora.DocumentImpl",
530 features);
531 compareAnnots(document, doc1);
532
533 // second document
534 storageDir = null;
535 storageDir = new URL(urlBase, "tests/gu");
536
537 //open the data store
538 ds = Factory.openDataStore("gate.persist.SerialDataStore",
539 storageDir.toExternalForm());
540 //get LR id
541 lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0);
542 // get the document from data store
543 features = Factory.newFeatureMap();
544 features.put(DataStore.DATASTORE_FEATURE_NAME, ds);
545 features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
546 document = (Document) Factory.createResource(
547 "gate.corpora.DocumentImpl",
548 features);
549 compareAnnots(document,doc2);
550
551 // third document
552 storageDir = null;
553 storageDir = new URL(urlBase, "tests/in");
554
555 //open the data store
556 ds = Factory.openDataStore("gate.persist.SerialDataStore",
557 storageDir.toExternalForm());
558 //get LR id
559 lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0);
560 // get the document from data store
561 features = Factory.newFeatureMap();
562 features.put(DataStore.DATASTORE_FEATURE_NAME, ds);
563 features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
564 document = (Document) Factory.createResource(
565 "gate.corpora.DocumentImpl",
566 features);
567 compareAnnots(document,doc3);
568 } // testAllPR()
569
570 // public void compareAnnots1(Document keyDocument, Document responseDocument)
571 // throws Exception{
572 // // organization type
573 // Iterator iteratorTypes = annotationTypes.iterator();
574 // while (iteratorTypes.hasNext()){
575 // // get the type of annotation
576 // String annotType = (String)iteratorTypes.next();
577 // // create annotation schema
578 // AnnotationSchema annotationSchema = new AnnotationSchema();
579 //
580 // annotationSchema.setAnnotationName(annotType);
581 //
582 // // create an annotation diff
583 // AnnotationDiff annotDiff = new AnnotationDiff();
584 // annotDiff.setKeyDocument(keyDocument);
585 // annotDiff.setResponseDocument(responseDocument);
586 // annotDiff.setAnnotationSchema(annotationSchema);
587 // annotDiff.setKeyAnnotationSetName(null);
588 // annotDiff.setResponseAnnotationSetName(null);
589 //
590 // Set significantFeatures = new HashSet(Arrays.asList(
591 // new String[]{"NMRule", "kind", "orgType", "rule",
592 // "rule1", "rule2", "locType", "gender",
593 // "majorType", "minorType", "category",
594 // "length", "orth", "string", "subkind",
595 // "symbolkind"}));
596 // annotDiff.setKeyFeatureNamesSet(significantFeatures);
597 // annotDiff.setTextMode(new Boolean(true));
598 //
599 // annotDiff.init();
600 //
601 // if (DEBUG){
602 // if (annotDiff.getFMeasureAverage() != 1.0) {
603 // assertTrue("missing annotations " +
604 // annotDiff.getAnnotationsOfType(AnnotationDiff.MISSING_TYPE)
605 // + " spurious annotations " +
606 // annotDiff.getAnnotationsOfType(AnnotationDiff.SPURIOUS_TYPE)
607 // + " partially-correct annotations " +
608 // annotDiff.getAnnotationsOfType(
609 // AnnotationDiff.PARTIALLY_CORRECT_TYPE),false);
610 // }
611 // }//if
612 //
613 // assertTrue(annotType+ " precision average in "+
614 // responseDocument.getSourceUrl().getFile()+
615 // " is "+ annotDiff.getPrecisionAverage()+ " instead of 1.0 ",
616 // annotDiff.getPrecisionAverage()== 1.0);
617 // assertTrue(annotType+" recall average in "
618 // +responseDocument.getSourceUrl().getFile()+
619 // " is " + annotDiff.getRecallAverage()+ " instead of 1.0 ",
620 // annotDiff.getRecallAverage()== 1.0);
621 // assertTrue(annotType+" f-measure average in "
622 // +responseDocument.getSourceUrl().getFile()+
623 // " is "+ annotDiff.getFMeasureAverage()+ " instead of 1.0 ",
624 // annotDiff.getFMeasureAverage()== 1.0);
625 // }//while
626 // }// public void compareAnnots
627 //
628 public void compareAnnots(Document keyDocument, Document responseDocument)
629 throws Exception{
630 // organization type
631 Iterator<String> iteratorTypes = annotationTypes.iterator();
632 while (iteratorTypes.hasNext()){
633 // get the type of annotation
634 String annotType = iteratorTypes.next();
635
636 // create an annotation diff
637 AnnotationDiffer annotDiffer = new AnnotationDiffer();
638 Set<String> significantFeatures = new HashSet<String>(Arrays.asList(
639 new String[]{"NMRule", "kind", "orgType", "rule",
640 "rule1", "rule2", "locType", "gender",
641 "majorType", "minorType", "category",
642 "length", "orth", "string", "subkind",
643 "symbolkind"}));
644 annotDiffer.setSignificantFeaturesSet(significantFeatures);
645 annotDiffer.calculateDiff(keyDocument.getAnnotations().get(annotType),
646 responseDocument.getAnnotations().get(annotType));
647 if(DEBUG) annotDiffer.printMissmatches();
648
649 assertTrue(annotType+ " precision strict in "+
650 responseDocument.getSourceUrl().getFile()+
651 " is "+ annotDiffer.getPrecisionStrict()+ " instead of 1.0 ",
652 annotDiffer.getPrecisionStrict()== 1.0);
653
654 assertTrue(annotType+" recall strict in "
655 +responseDocument.getSourceUrl().getFile()+
656 " is " + annotDiffer.getRecallStrict()+ " instead of 1.0 ",
657 annotDiffer.getRecallStrict()== 1.0);
658
659 assertTrue(annotType+" f-measure strict in "
660 +responseDocument.getSourceUrl().getFile()+
661 " is "+ annotDiffer.getFMeasureStrict(0.5)+ " instead of 1.0 ",
662 annotDiffer.getFMeasureStrict(0.5)== 1.0);
663 }//while
664 }// public void compareAnnots
665
666 public static class TestConstraintPredicate extends AbstractConstraintPredicate {
667 @Override
668 protected boolean doMatch(Object value, AnnotationSet context)
669 throws JapeException {
670 return false;
671 }
672 public String getOperator() {
673 return "fooOp";
674 }
675 };
676
677 public static class TestAnnotationAccessor extends MetaPropertyAccessor {
678 public Object getValue(Annotation annot, AnnotationSet context) {
679 return "foo";
680 }
681
682 @Override
683 public Object getKey() {
684 return "fooProp";
685 }
686 };
687
688 /** Test suite routine for the test runner */
689 public static Test suite() {
690 return new TestSuite(TestPR.class);
691 } // suite
692
693 public static void main(String[] args) {
694 try{
695 Gate.init();
696 TestPR testPR = new TestPR("");
697 testPR.setUp();
698 testPR.testTokenizer();
699 testPR.testGazetteer();
700 testPR.testSplitter();
701 testPR.testTagger();
702 testPR.testTransducer();
703 testPR.testOrthomatcher();
704 testPR.testAllPR();
705 testPR.tearDown();
706 } catch(Exception e) {
707 e.printStackTrace();
708 }
709 } // main
710 } // class TestPR
|