TestPR.java
001 /*
002  *  TestPR.java
003  *
004  *  Copyright (c) 1995-2010, The University of Sheffield. See the file
005  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006  *
007  *  This file is part of GATE (see http://gate.ac.uk/), and is free
008  *  software, licenced under the GNU Library General Public License,
009  *  Version 2, June 1991 (in the distribution as file licence.html,
010  *  and also available at http://gate.ac.uk/gate/licence.html).
011  *
012  *  Oana Hamza,
013  *
014  *  $Id: TestPR.java 13371 2011-01-28 14:01:03Z markagreenwood $
015  */
016 
017 package gate.creole;
018 
019 import java.net.URL;
020 import java.util.*;
021 
022 import junit.framework.*;
023 
024 import gate.*;
025 import gate.corpora.TestDocument;
026 import gate.creole.gazetteer.DefaultGazetteer;
027 import gate.creole.orthomatcher.OrthoMatcher;
028 import gate.creole.splitter.SentenceSplitter;
029 import gate.creole.tokeniser.DefaultTokeniser;
030 import gate.jape.JapeException;
031 import gate.jape.constraint.*;
032 import gate.util.AnnotationDiffer;
033 
034 /** Test the PRs on three documents */
035 public class TestPR extends TestCase
036 {
037   /** Debug flag */
038   private static final boolean DEBUG = false;
039 
040   protected static Document doc1;
041   protected static Document doc2;
042   protected static Document doc3;
043   protected static Document doc4;
044 
045   protected static List<String> annotationTypes = new ArrayList<String>(10);
046 
047   static{
048     annotationTypes.add(ANNIEConstants.SENTENCE_ANNOTATION_TYPE);
049     annotationTypes.add(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE);
050     annotationTypes.add(ANNIEConstants.LOCATION_ANNOTATION_TYPE);
051     annotationTypes.add(ANNIEConstants.PERSON_ANNOTATION_TYPE);
052     annotationTypes.add(ANNIEConstants.DATE_ANNOTATION_TYPE);
053     annotationTypes.add(ANNIEConstants.MONEY_ANNOTATION_TYPE);
054     annotationTypes.add(ANNIEConstants.LOOKUP_ANNOTATION_TYPE);
055     annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
056     try{
057       //get 4 documents
058       if (doc1 == null)
059         doc1 = Factory.newDocument(
060             new URL(TestDocument.getTestServerName() +
061                     "tests/ft-bt-03-aug-2001.html"),
062             "ISO-8859-1"
063             );
064 
065       if (doc2 == null)
066         doc2 = Factory.newDocument(
067             new URL(TestDocument.getTestServerName() +
068                     "tests/gu-Am-Brit-4-aug-2001.html"),
069             "ISO-8859-1"
070             );
071 
072       if (doc3 == null)
073         doc3 = Factory.newDocument(
074             new URL(TestDocument.getTestServerName() +
075                     "tests/in-outlook-09-aug-2001.html"),
076             "ISO-8859-1"
077             );
078       if (doc4 == null)
079         doc4 = Factory.newDocument(
080             new URL(TestDocument.getTestServerName() +
081                     "tests/OrthoMatcherTest.txt"),
082             "UTF-8"
083             );
084     }catch(Exception e){
085       e.printStackTrace();
086     }
087   }
088 
089   /** Construction */
090   public TestPR(String name) { super(name)}
091 
092   /** Fixture set up */
093   public void setUp() throws Exception {
094   // setUp
095 
096   /** Put things back as they should be after running tests.
097     */
098   public void tearDown() throws Exception {
099   // tearDown
100 
101   public void testTokenizer() throws Exception {
102     FeatureMap params = Factory.newFeatureMap();
103     DefaultTokeniser tokeniser = (DefaultTokeniserFactory.createResource(
104                     "gate.creole.tokeniser.DefaultTokeniser", params);
105 
106 
107     //run the tokeniser for doc1
108     tokeniser.setDocument(doc1);
109     tokeniser.execute();
110 
111     //run the tokeniser for doc2
112     tokeniser.setDocument(doc2);
113     tokeniser.execute();
114 
115     //run the tokeniser for doc3
116     tokeniser.setDocument(doc3);
117     tokeniser.execute();
118 
119     tokeniser.setDocument(doc4);
120     tokeniser.execute();
121 
122     Factory.deleteResource(tokeniser);
123 
124     // assertions for doc 1
125     assertTrue("Found in "+doc1.getSourceUrl().getFile()" "+
126       doc1.getAnnotations().size() +
127       " Token annotations, instead of the expected 1279.",
128       doc1.getAnnotations().size()== 1279);
129 
130     // assertions for doc 2
131     assertTrue("Found in "+ doc2.getSourceUrl().getFile()" "+
132       doc2.getAnnotations().size() +
133       " Token annotations, instead of the expected 2134.",
134       doc2.getAnnotations().size()== 2134);
135 
136     // assertions for doc 3
137     assertTrue("Found in "+ doc3.getSourceUrl().getFile()" "+
138       doc3.getAnnotations().size() +
139       " Token annotations, instead of the expected 2807.",
140       doc3.getAnnotations().size()== 2807);
141 
142   }// testTokenizer
143 
144   public void testGazetteer() throws Exception {
145     FeatureMap params = Factory.newFeatureMap();
146     DefaultGazetteer gaz = (DefaultGazetteerFactory.createResource(
147                           "gate.creole.gazetteer.DefaultGazetteer", params);
148 
149     //run gazetteer for doc1
150     gaz.setDocument(doc1);
151     gaz.execute();
152 
153     //run gazetteer for doc2
154     gaz.setDocument(doc2);
155     gaz.execute();
156 
157     //run gazetteer for doc3
158     gaz.setDocument(doc3);
159     gaz.execute();
160 
161     //run gazetteer for doc3
162     gaz.setDocument(doc4);
163     gaz.execute();
164 
165 
166     Factory.deleteResource(gaz);
167 
168 //    assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+
169 //      doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() +
170 //      " Lookup annotations, instead of the expected 60.",
171 //      doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 60);
172     assertEquals("Wrong number of annotations produced in " +
173             doc1.getSourceUrl().getFile(),
174             57,
175             doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size());
176 
177 //    assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+
178 //      doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() +
179 //      " Lookup annotations, instead of the expected 134.",
180 //      doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 134);
181     assertEquals("Wrong number of annotations produced in " +
182             doc2.getSourceUrl().getFile(),
183             127,
184             doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size());
185 
186 //    assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+
187 //      doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() +
188 //      " Lookup annotations, instead of the expected 144.",
189 //      doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 144);
190     assertEquals("Wrong number of annotations produced in " +
191             doc3.getSourceUrl().getFile(),
192             139,
193             doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size());
194   }//testGazetteer
195 
196   public void testSplitter() throws Exception {
197     FeatureMap params = Factory.newFeatureMap();
198     SentenceSplitter splitter = (SentenceSplitterFactory.createResource(
199                           "gate.creole.splitter.SentenceSplitter", params);
200 
201     //run splitter for doc1
202     splitter.setDocument(doc1);
203     splitter.execute();
204 
205     //run splitter for doc2
206     splitter.setDocument(doc2);
207     splitter.execute();
208 
209     //run splitter for doc3
210     splitter.setDocument(doc3);
211     splitter.execute();
212 
213     //run splitter for doc3
214     splitter.setDocument(doc4);
215     splitter.execute();
216 
217 
218     Factory.deleteResource(splitter);
219 
220     // assertions for doc 1
221     assertTrue("Found in "+ doc1.getSourceUrl().getFile()" "+
222       doc1.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() +
223       " Sentence annotations, instead of the expected 21.",
224       doc1.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 21);
225 
226     assertTrue("Found in "+ doc1.getSourceUrl().getFile()" "+
227       doc1.getAnnotations().get("Split").size() +
228       " Split annotations, instead of the expected 38.",
229       doc1.getAnnotations().get("Split").size()== 38);
230 
231     // assertions for doc 2
232     assertTrue("Found in "+ doc2.getSourceUrl().getFile()" "+
233       doc2.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() +
234       " Sentence annotations, instead of the expected 52.",
235       doc2.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 52);
236 
237     assertTrue("Found in "+ doc2.getSourceUrl().getFile()" "+
238       doc2.getAnnotations().get("Split").size() +
239       " Split annotations, instead of the expected 75.",
240       doc2.getAnnotations().get("Split").size()== 75);
241 
242     // assertions for doc 3
243     assertTrue("Found in "+ doc3.getSourceUrl().getFile()" "+
244       doc3.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() +
245       " Sentence annotations, instead of the expected 66.",
246       doc3.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 66);
247 
248     assertTrue("Found in "+ doc3.getSourceUrl().getFile()" "+
249       doc3.getAnnotations().get("Split").size() +
250       " Split annotations, instead of the expected 84.",
251       doc3.getAnnotations().get("Split").size()== 84);
252   }//testSplitter
253 
254   public void testTagger() throws Exception {
255     FeatureMap params = Factory.newFeatureMap();
256     POSTagger tagger = (POSTaggerFactory.createResource(
257                           "gate.creole.POSTagger", params);
258 
259 
260     //run the tagger for doc1
261     tagger.setDocument(doc1);
262     tagger.execute();
263 
264     //run the tagger for doc2
265     tagger.setDocument(doc2);
266     tagger.execute();
267 
268     //run the tagger for doc3
269     tagger.setDocument(doc3);
270     tagger.execute();
271 
272     //run the tagger for doc3
273     tagger.setDocument(doc4);
274     tagger.execute();
275 
276     Factory.deleteResource(tagger);
277 
278     HashSet<String> fType = new HashSet<String>();
279     fType.add(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME);
280 
281     // assertions for doc 1
282     AnnotationSet annots =
283       doc1.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType);
284 
285     assertTrue("Found in "+ doc1.getSourceUrl().getFile()" "+ annots.size() +
286       " Token annotations with category feature, instead of the expected 675.",
287       annots.size() == 675);
288 
289     // assertions for doc 2
290     annots = doc2.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType);
291     assertTrue("Found in "+  doc2.getSourceUrl().getFile()" "+annots.size() +
292       " Token annotations with category feature, instead of the expected 1131.",
293       annots.size() == 1131);
294 
295     // assertions for doc 3
296     annots = doc3.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType);
297     assertTrue("Found in "+ doc3.getSourceUrl().getFile()" "+ annots.size() +
298       " Token annotations with category feature, instead of the expected 1447.",
299       annots.size() == 1447);
300   }//testTagger()
301 
302   public void testTransducer() throws Exception {
303     FeatureMap params = Factory.newFeatureMap();
304     ANNIETransducer transducer = (ANNIETransducerFactory.createResource(
305                           "gate.creole.ANNIETransducer", params);
306 
307     //run the transducer for doc1
308     transducer.setDocument(doc1);
309     transducer.execute();
310 
311     //run the transducer for doc2
312     transducer.setDocument(doc2);
313     transducer.execute();
314 
315     //run the transducer for doc3
316     transducer.setDocument(doc3);
317     transducer.execute();
318 
319     //run the transducer for doc3
320     transducer.setDocument(doc4);
321     transducer.execute();
322 
323     Factory.deleteResource(transducer);
324 
325     // assertions for doc 1
326     assertTrue("Found in "+ doc1.getSourceUrl().getFile()" "+
327       doc1.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() +
328       " Organization annotations, instead of the expected 26",
329       doc1.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 26);
330     assertTrue("Found in "+doc1.getSourceUrl().getFile()" "+
331       doc1.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() +
332       " Location annotations, instead of the expected 3",
333       doc1.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 3);
334     assertTrue("Found in "+doc1.getSourceUrl().getFile()" "+
335       doc1.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() +
336       " Person annotations, instead of the expected 1",
337       doc1.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 1);
338     assertTrue("Found in "+doc1.getSourceUrl().getFile()" "+
339       doc1.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() +
340       " Date annotations, instead of the expected 7",
341       doc1.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 7);
342     assertTrue("Found in "+doc1.getSourceUrl().getFile()" "+
343       doc1.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() +
344       " Money annotations, instead of the expected 1",
345       doc1.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 1);
346 
347     // assertions for doc 2
348     assertTrue("Found in "+doc2.getSourceUrl().getFile()" "+
349       doc2.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() +
350       " Organization annotations, instead of the expected 23",
351       doc2.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 24);
352     assertTrue("Found in "+doc2.getSourceUrl().getFile()" "+
353       doc2.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() +
354       " Location annotations, instead of the expected 11",
355       doc2.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 11);
356     assertTrue("Found in "+doc2.getSourceUrl().getFile()" "+
357       doc2.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() +
358       " Person annotations, instead of the expected 1",
359       doc2.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 1);
360     assertTrue("Found in "+doc2.getSourceUrl().getFile()" "+
361       doc2.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() +
362       " Date annotations, instead of the expected 8",
363       doc2.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 8);
364     assertTrue("Found in "+doc2.getSourceUrl().getFile()" "+
365       doc2.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() +
366       " Money annotations, instead of the expected 3",
367       doc2.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 3);
368 
369     // assertions for doc 3
370     assertTrue("Found in "+doc3.getSourceUrl().getFile()" "+
371       doc3.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() +
372       " Organization annotations, instead of the expected 32",
373       doc3.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 32);
374     assertTrue("Found in "+doc3.getSourceUrl().getFile()" "+
375       doc3.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() +
376       " Location annotations, instead of the expected 11",
377       doc3.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 11);
378     assertTrue("Found in "+doc3.getSourceUrl().getFile()" "+
379       doc3.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() +
380       " Person annotations, instead of the expected 8",
381       doc3.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 8);
382     assertTrue("Found in "+doc3.getSourceUrl().getFile()" "+
383       doc3.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() +
384       " Date annotations, instead of the expected 7",
385       doc3.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 7);
386     assertTrue("Found in "+doc3.getSourceUrl().getFile()" "+
387       doc3.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() +
388       " Money annotations, instead of the expected 4",
389       doc3.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 4);
390 
391     assertEquals("Wrong number of Person annotations in OrthoMatcher test document",22,
392             doc4.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size());
393   }//testTransducer
394 
395   public void testCustomConstraintDefs() throws Exception {
396     FeatureMap params = Factory.newFeatureMap();
397 
398     List<String> operators = new ArrayList<String>();
399     params.put("operators", operators);
400     ConstraintPredicate testPred = new TestConstraintPredicate();
401     operators.add(testPred.getClass().getName());
402 
403     List<String> accessors = new ArrayList<String>();
404     params.put("annotationAccessors", accessors);
405     AnnotationAccessor testAccessor = new TestAnnotationAccessor();
406     accessors.add(testAccessor.getClass().getName());
407 
408     ANNIETransducer transducer = (ANNIETransducerFactory.createResource(
409                           "gate.creole.ANNIETransducer", params);
410 
411     assertEquals(accessors, transducer.getAnnotationAccessors());
412     assertEquals(operators, transducer.getOperators());
413 
414     ConstraintPredicate returnedPred = Factory.getConstraintFactory().createPredicate("fooOp", testAccessor, "fooValue");
415     assertNotNull(returnedPred);
416     assertEquals("Operator not set", testPred.getClass(), returnedPred.getClass());
417 
418     AnnotationAccessor returnAccessor = Factory.getConstraintFactory().createMetaPropertyAccessor("fooProp");
419     assertNotNull(returnAccessor);
420     assertEquals("Accessor not set", testAccessor.getClass(), returnAccessor.getClass());
421   }
422 
423   public void testOrthomatcher() throws Exception {
424     FeatureMap params = Factory.newFeatureMap();
425 
426     OrthoMatcher orthomatcher = (OrthoMatcherFactory.createResource(
427                           "gate.creole.orthomatcher.OrthoMatcher", params);
428 
429 
430     // run the orthomatcher for doc1
431     orthomatcher.setDocument(doc1);
432     orthomatcher.execute();
433 
434     //run the orthomatcher for doc2
435     orthomatcher.setDocument(doc2);
436     orthomatcher.execute();
437 
438     //run the orthomatcher for doc3
439     orthomatcher.setDocument(doc3);
440     orthomatcher.execute();
441 
442     //run the orthomatcher for doc3
443     orthomatcher.setDocument(doc4);
444     orthomatcher.execute();
445 
446     Factory.deleteResource(orthomatcher);
447 
448     HashSet<String> fType = new HashSet<String>();
449     fType.add(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
450     AnnotationSet annots =
451                   doc1.getAnnotations().get(null,fType);
452 
453 //    assertEquals("Wrong number of annotations with matches feature",
454 //            17, annots.size());
455 
456     annots = doc2.getAnnotations().get(null,fType);
457 //    assertEquals("Wrong number of annotations with matches feature",
458 //            31, annots.size());
459 
460     annots = doc3.getAnnotations().get(null,fType);
461 //    assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ annots.size() +
462 //      " annotations with matches feature, instead of the expected 39.",
463 //      annots.size() == 39);
464 
465     AnnotationSet personAnnots = doc4.getAnnotations().get("Person");
466     Annotation sarahAnnot = personAnnots.get(new Long(806)new Long(811)).iterator().next();
467     assertEquals("Wrong number of matches for second Sarah in document"2,
468             ((java.util.ArrayListsarahAnnot.getFeatures().get("matches")).size());
469 
470     Annotation robertQJones = personAnnots.get(new Long(300)new Long(315)).iterator().next();
471     assertEquals("Wrong number of matches for Robert Q Jones in document"3,
472             ((java.util.ArrayListrobertQJones.getFeatures().get("matches")).size());
473 
474     Annotation robertCJones = personAnnots.get(new Long(0)new Long(15)).iterator().next();
475     assertEquals("Wrong number of matches for Robert C Jones in document"3,
476             ((java.util.ArrayListrobertCJones.getFeatures().get("matches")).size());
477 
478     Annotation robertAnderson = personAnnots.get(new Long(1188)new Long(1203)).iterator().next();
479     assertEquals("Found a match for Robert Anderson, but he should not have been matched.", false,
480             robertAnderson.getFeatures().containsKey("matches"));
481 
482 
483 
484   }//testOrthomatcher
485 
486   /** A test for comparing the annotation sets*/
487   public void testAllPR() throws Exception {
488 
489     // verify if the saved data store is the same with the just processed file
490     // first document
491     String urlBaseName = Gate.locateGateFiles();
492 //    RE re1 = new RE("build/gate.jar!");
493 //    RE re2 = new RE("jar:");
494 //    urlBaseName = re1.substituteAll( urlBaseName,"classes");
495 //    urlBaseName = re2.substituteAll( urlBaseName,"");
496 
497     if (urlBaseName.endsWith("/bin/gate.jar!/")) {
498       StringBuffer buff = new StringBuffer(
499                             urlBaseName.substring(
500                               0,
501                               urlBaseName.lastIndexOf("bin/gate.jar!/"))
502                             );
503       buff.append("classes/");
504       buff.delete(0"jar:file:".length());
505       buff.insert(0"file://");
506       urlBaseName = buff.toString();
507     }
508 
509     URL urlBase = new URL(urlBaseName + "gate/resources/gate.ac.uk/");
510 
511     URL storageDir = null;
512     storageDir = new URL(urlBase, "tests/ft");
513 
514     //open the data store
515     DataStore ds = Factory.openDataStore
516                     ("gate.persist.SerialDataStore",
517                      storageDir.toExternalForm());
518 
519     //get LR id
520     String lrId = (String)ds.getLrIds
521                                 ("gate.corpora.DocumentImpl").get(0);
522 
523 
524     // get the document from data store
525     FeatureMap features = Factory.newFeatureMap();
526     features.put(DataStore.DATASTORE_FEATURE_NAME, ds);
527     features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
528     Document document = (DocumentFactory.createResource(
529                                       "gate.corpora.DocumentImpl",
530                                       features);
531     compareAnnots(document, doc1);
532 
533     // second document
534     storageDir = null;
535     storageDir = new URL(urlBase, "tests/gu");
536 
537     //open the data store
538     ds = Factory.openDataStore("gate.persist.SerialDataStore",
539                                storageDir.toExternalForm());
540     //get LR id
541     lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0);
542     // get the document from data store
543     features = Factory.newFeatureMap();
544     features.put(DataStore.DATASTORE_FEATURE_NAME, ds);
545     features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
546     document = (DocumentFactory.createResource(
547                                       "gate.corpora.DocumentImpl",
548                                       features);
549     compareAnnots(document,doc2);
550 
551     // third document
552     storageDir = null;
553     storageDir = new URL(urlBase, "tests/in");
554 
555     //open the data store
556     ds = Factory.openDataStore("gate.persist.SerialDataStore",
557                                storageDir.toExternalForm());
558     //get LR id
559     lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0);
560     // get the document from data store
561     features = Factory.newFeatureMap();
562     features.put(DataStore.DATASTORE_FEATURE_NAME, ds);
563     features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
564     document = (DocumentFactory.createResource(
565                                 "gate.corpora.DocumentImpl",
566                                 features);
567     compareAnnots(document,doc3);
568   // testAllPR()
569 
570 //  public void compareAnnots1(Document keyDocument, Document responseDocument)
571 //              throws Exception{
572 //    // organization type
573 //    Iterator iteratorTypes = annotationTypes.iterator();
574 //    while (iteratorTypes.hasNext()){
575 //      // get the type of annotation
576 //      String annotType = (String)iteratorTypes.next();
577 //      // create annotation schema
578 //      AnnotationSchema annotationSchema = new AnnotationSchema();
579 //
580 //      annotationSchema.setAnnotationName(annotType);
581 //
582 //      // create an annotation diff
583 //      AnnotationDiff annotDiff = new AnnotationDiff();
584 //      annotDiff.setKeyDocument(keyDocument);
585 //      annotDiff.setResponseDocument(responseDocument);
586 //      annotDiff.setAnnotationSchema(annotationSchema);
587 //      annotDiff.setKeyAnnotationSetName(null);
588 //      annotDiff.setResponseAnnotationSetName(null);
589 //
590 //      Set significantFeatures = new HashSet(Arrays.asList(
591 //                    new String[]{"NMRule", "kind", "orgType", "rule",
592 //                                 "rule1", "rule2", "locType", "gender",
593 //                                 "majorType", "minorType", "category",
594 //                                 "length", "orth", "string", "subkind",
595 //                                 "symbolkind"}));
596 //      annotDiff.setKeyFeatureNamesSet(significantFeatures);
597 //      annotDiff.setTextMode(new Boolean(true));
598 //
599 //      annotDiff.init();
600 //
601 //      if (DEBUG){
602 //        if (annotDiff.getFMeasureAverage() != 1.0) {
603 //          assertTrue("missing annotations " +
604 //            annotDiff.getAnnotationsOfType(AnnotationDiff.MISSING_TYPE)
605 //            + " spurious annotations " +
606 //            annotDiff.getAnnotationsOfType(AnnotationDiff.SPURIOUS_TYPE)
607 //            + " partially-correct annotations " +
608 //            annotDiff.getAnnotationsOfType(
609 //                            AnnotationDiff.PARTIALLY_CORRECT_TYPE),false);
610 //        }
611 //      }//if
612 //
613 //      assertTrue(annotType+ " precision average in "+
614 //        responseDocument.getSourceUrl().getFile()+
615 //        " is "+ annotDiff.getPrecisionAverage()+ " instead of 1.0 ",
616 //        annotDiff.getPrecisionAverage()== 1.0);
617 //      assertTrue(annotType+" recall average in "
618 //        +responseDocument.getSourceUrl().getFile()+
619 //        " is " + annotDiff.getRecallAverage()+ " instead of 1.0 ",
620 //        annotDiff.getRecallAverage()== 1.0);
621 //      assertTrue(annotType+" f-measure average in "
622 //        +responseDocument.getSourceUrl().getFile()+
623 //        " is "+ annotDiff.getFMeasureAverage()+ " instead of 1.0 ",
624 //        annotDiff.getFMeasureAverage()== 1.0);
625 //     }//while
626 //   }// public void compareAnnots
627 //
628    public void compareAnnots(Document keyDocument, Document responseDocument)
629                 throws Exception{
630       // organization type
631       Iterator<String> iteratorTypes = annotationTypes.iterator();
632       while (iteratorTypes.hasNext()){
633         // get the type of annotation
634         String annotType = iteratorTypes.next();
635 
636         // create an annotation diff
637         AnnotationDiffer annotDiffer = new AnnotationDiffer();
638         Set<String> significantFeatures = new HashSet<String>(Arrays.asList(
639                       new String[]{"NMRule""kind""orgType""rule",
640                                    "rule1""rule2""locType""gender",
641                                    "majorType""minorType""category",
642                                    "length""orth""string""subkind",
643                                    "symbolkind"}));
644         annotDiffer.setSignificantFeaturesSet(significantFeatures);
645         annotDiffer.calculateDiff(keyDocument.getAnnotations().get(annotType),
646                                   responseDocument.getAnnotations().get(annotType));
647         if(DEBUGannotDiffer.printMissmatches();
648 
649         assertTrue(annotType+ " precision strict in "+
650           responseDocument.getSourceUrl().getFile()+
651           " is "+ annotDiffer.getPrecisionStrict()" instead of 1.0 ",
652           annotDiffer.getPrecisionStrict()== 1.0);
653 
654         assertTrue(annotType+" recall strict in "
655           +responseDocument.getSourceUrl().getFile()+
656           " is " + annotDiffer.getRecallStrict()" instead of 1.0 ",
657           annotDiffer.getRecallStrict()== 1.0);
658 
659         assertTrue(annotType+" f-measure strict in "
660           +responseDocument.getSourceUrl().getFile()+
661           " is "+ annotDiffer.getFMeasureStrict(0.5)" instead of 1.0 ",
662           annotDiffer.getFMeasureStrict(0.5)== 1.0);
663       }//while
664      }// public void compareAnnots
665 
666    public static class TestConstraintPredicate extends AbstractConstraintPredicate {
667      @Override
668      protected boolean doMatch(Object value, AnnotationSet context)
669              throws JapeException {
670        return false;
671      }
672      public String getOperator() {
673        return "fooOp";
674      }
675    };
676 
677    public static class TestAnnotationAccessor extends MetaPropertyAccessor {
678      public Object getValue(Annotation annot, AnnotationSet context) {
679        return "foo";
680      }
681 
682     @Override
683     public Object getKey() {
684       return "fooProp";
685     }
686    };
687 
688   /** Test suite routine for the test runner */
689   public static Test suite() {
690     return new TestSuite(TestPR.class);
691   // suite
692 
693   public static void main(String[] args) {
694     try{
695       Gate.init();
696       TestPR testPR = new TestPR("");
697       testPR.setUp();
698       testPR.testTokenizer();
699       testPR.testGazetteer();
700       testPR.testSplitter();
701       testPR.testTagger();
702       testPR.testTransducer();
703       testPR.testOrthomatcher();
704       testPR.testAllPR();
705       testPR.tearDown();
706     catch(Exception e) {
707       e.printStackTrace();
708     }
709   // main
710 // class TestPR