001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * Valentin Tablan 17/05/01
011 *
012 * $Id: TestSplitterTagger.java 12006 2009-12-01 17:24:28Z thomas_heitz $
013 */
014 package gate.creole;
015
016 import java.net.URL;
017 import java.util.Iterator;
018
019 import junit.framework.*;
020
021 import gate.*;
022 import gate.corpora.TestDocument;
023 import gate.creole.splitter.SentenceSplitter;
024 import gate.creole.tokeniser.DefaultTokeniser;
025 import gate.util.GateException;
026
027 /**
028 * Test code for the SentenceSplitter and the POS tagger.
029 */
030 public class TestSplitterTagger extends TestCase{
031
032 /** Construction */
033 public TestSplitterTagger(String name) { super(name); }
034
035 /** Fixture set up */
036 public void setUp() throws GateException {
037 } // setUp
038
039 /** Put things back as they should be after running tests
040 * (reinitialise the CREOLE register).
041 */
042 public void tearDown() throws Exception {
043 } // tearDown
044
045 /** Test suite routine for the test runner */
046 public static Test suite() {
047 return new TestSuite(TestSplitterTagger.class);
048 } // suite
049
050
051
052 public void testSplitterTagger() throws Exception{
053 //get a document
054 Document doc = Factory.newDocument(
055 new URL(TestDocument.getTestServerName() + "tests/doc0.html")
056 );
057
058 //tokenise the document
059 //create a tokeniser
060 FeatureMap params = Factory.newFeatureMap();
061 DefaultTokeniser tokeniser = (DefaultTokeniser) Factory.createResource(
062 "gate.creole.tokeniser.DefaultTokeniser", params);
063 //runtime stuff
064 tokeniser.setDocument(doc);
065 tokeniser.setAnnotationSetName("testAS");
066 tokeniser.execute();
067
068
069 //create a splitter
070 params = Factory.newFeatureMap();
071 SentenceSplitter splitter = (SentenceSplitter) Factory.createResource(
072 "gate.creole.splitter.SentenceSplitter", params);
073
074 //runtime stuff
075 splitter.setDocument(doc);
076 splitter.setOutputASName("testAS");
077 splitter.setInputASName("testAS");
078 splitter.execute();
079 assertTrue(!doc.getAnnotations("testAS").
080 get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).isEmpty());
081
082 //now check the tagger
083 //create a tagger
084 params = Factory.newFeatureMap();
085 POSTagger tagger = (POSTagger) Factory.createResource(
086 "gate.creole.POSTagger", params);
087
088 //runtime stuff
089 tagger.setDocument(doc);
090 tagger.setInputASName("testAS");
091 tagger.execute();
092 Iterator<Annotation> tokIter =doc.getAnnotations("testAS").
093 get(ANNIEConstants.TOKEN_ANNOTATION_TYPE).iterator();
094 while(tokIter.hasNext()){
095 Annotation token = tokIter.next();
096 String kind = (String)token.getFeatures().
097 get(ANNIEConstants.TOKEN_KIND_FEATURE_NAME);
098 if(kind.equals(ANNIEConstants.TOKEN_KIND_FEATURE_NAME))
099 assertNotNull(token.getFeatures().
100 get(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME));
101 }
102 }
103 }
|