001 package gate.creole.morph;
002
003 import java.io.File;
004 import java.util.*;
005 import junit.framework.*;
006 import gate.*;
007 import gate.creole.*;
008 import gate.creole.tokeniser.DefaultTokeniser;
009 import gate.util.Files;
010 import gate.util.OffsetComparator;
011
012 /**
013 * <p>
014 * Title: TestMorph
015 * </p>
016 * <p>
017 * Description:
018 * </p>
019 * <p>
020 * Copyright: Copyright (c) 2000
021 * </p>
022 * <p>
023 * Company: University Of Sheffield
024 * </p>
025 *
026 * @author not attributable
027 * @version 1.0
028 */
029
030 public class TestMorph extends TestCase {
031
032 private Morph morpher;
033
034 private Document verbDocumentToTest, verbDocumentWithAnswers,
035 nounDocumentToTest, nounDocumentWithAnswers;
036
037 public static int count = 0;
038
039 private DefaultTokeniser tokeniser;
040
041 public TestMorph(String dummy) {
042 super(dummy);
043 }
044
045 /**
046 * This method sets up the parameters for the files to be testes It
047 * initialises the Tokenizer and sets up the other parameters for the morph
048 * program
049 */
050 protected void setUp() {
051 try {
052 // make sure the right plugin is loaded
053 File pluginsHome = new File(System
054 .getProperty(GateConstants.GATE_HOME_PROPERTY_NAME),
055 "plugins");
056 Gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "Tools").toURI().toURL());
057 // creating documents
058 verbDocumentToTest = Factory.newDocument(Files
059 .getGateResource("/gate.ac.uk/tests/morph/verbTest.dat"));
060 verbDocumentWithAnswers = Factory.newDocument(Files
061 .getGateResource("/gate.ac.uk/tests/morph/verbAnswer.dat"));
062 nounDocumentToTest = Factory.newDocument(Files
063 .getGateResource("/gate.ac.uk/tests/morph/nounTest.dat"));
064 nounDocumentWithAnswers = Factory.newDocument(Files
065 .getGateResource("/gate.ac.uk/tests/morph/nounAnswer.dat"));
066 morpher = (Morph) Factory.createResource("gate.creole.morph.Morph");
067 morpher.setAffixFeatureName("affix");
068 morpher.setRootFeatureName("root");
069 tokeniser = (DefaultTokeniser) Factory
070 .createResource("gate.creole.tokeniser.DefaultTokeniser");
071 } catch (Exception rie) {
072 fail("Resources cannot be created");
073 }
074 }
075
076 /**
077 * Test the morpher on verbs, if their roots are identified correctly or not
078 */
079 public void testAll() {
080
081 // run the tokenizer on the verbTestDocument
082 tokeniser.setDocument(verbDocumentToTest);
083 tokeniser.setAnnotationSetName("TokeniserAS");
084 try {
085 tokeniser.execute();
086 } catch (ExecutionException ee) {
087 fail("Error while executing Tokenizer on the test document");
088 }
089
090 // run the tokenizer on the verbAnswerDocument
091 tokeniser.setDocument(verbDocumentWithAnswers);
092 tokeniser.setAnnotationSetName("TokeniserAS");
093 try {
094 tokeniser.execute();
095 } catch (ExecutionException ee) {
096 fail("Error while executing Tokenizer on the test document");
097 }
098
099 // now check if the tokenizer was run properly on the document
100 List<Annotation> queryTokens = new ArrayList<Annotation>(verbDocumentToTest.getAnnotations(
101 "TokeniserAS").get("Token"));
102 Collections.sort(queryTokens, new OffsetComparator());
103
104 // same procedure with the answer document
105 List<Annotation> answerTokens = new ArrayList<Annotation>(verbDocumentWithAnswers
106 .getAnnotations("TokeniserAS").get("Token"));
107 Collections.sort(answerTokens, new OffsetComparator());
108
109 // create iterator to get access to each and every individual token
110 Iterator<Annotation> queryTokensIter = queryTokens.iterator();
111 Iterator<Annotation> answerTokensIter = answerTokens.iterator();
112
113 while (queryTokensIter.hasNext() && answerTokensIter.hasNext()) {
114
115 // get the word to test
116 Annotation currentQueryToken = queryTokensIter.next();
117 String queryTokenValue = (String) (currentQueryToken.getFeatures()
118 .get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME));
119
120 // get the answer of this word
121 Annotation currentAnswerToken = answerTokensIter.next();
122 String answerTokenValue = (String) (currentAnswerToken
123 .getFeatures()
124 .get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME));
125 // run the morpher
126 String rootWord = morpher.findBaseWord(queryTokenValue, "VB");
127 // compare it with the answerTokenValue
128 assertEquals(rootWord, answerTokenValue);
129 }
130
131 // run the tokenizer on the nounTestDocument
132 tokeniser.setDocument(nounDocumentToTest);
133 tokeniser.setAnnotationSetName("TokeniserAS");
134 try {
135 tokeniser.execute();
136 } catch (ExecutionException ee) {
137 fail("Error while executing Tokenizer on the test document");
138 }
139
140 // run the tokenizer on the nounAnswerDocument
141 tokeniser.setDocument(nounDocumentWithAnswers);
142 tokeniser.setAnnotationSetName("TokeniserAS");
143 try {
144 tokeniser.execute();
145 } catch (ExecutionException ee) {
146 fail("Error while executing Tokenizer on the test document");
147 }
148
149 // check both documents are processed correctly by tokeniser
150 assertTrue(!nounDocumentToTest.getAnnotations("TokeniserAS").isEmpty());
151 assertTrue(!nounDocumentWithAnswers.getAnnotations("TokeniserAS")
152 .isEmpty());
153
154
155 // now check if the tokenizer was run properly on the document
156 queryTokens = new ArrayList(nounDocumentToTest.getAnnotations(
157 "TokeniserAS").get("Token"));
158 Comparator offsetComparator = new OffsetComparator();
159 Collections.sort(queryTokens, offsetComparator);
160
161 // same procedure with the answer document
162 answerTokens = new ArrayList(nounDocumentWithAnswers
163 .getAnnotations("TokeniserAS").get("Token"));
164 Collections.sort(answerTokens, offsetComparator);
165
166 // create iterator to get access to each and every individual token
167 queryTokensIter = queryTokens.iterator();
168 answerTokensIter = answerTokens.iterator();
169
170 while (queryTokensIter.hasNext() && answerTokensIter.hasNext()) {
171
172 // get the word to test
173 Annotation currentQueryToken = queryTokensIter.next();
174 String queryTokenValue = (String) (currentQueryToken.getFeatures()
175 .get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME));
176
177 // get the answer of this word
178 Annotation currentAnswerToken = answerTokensIter
179 .next();
180 String answerTokenValue = (String) (currentAnswerToken
181 .getFeatures()
182 .get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME));
183 // run the morpher
184 String rootWord = morpher.findBaseWord(queryTokenValue, "NN");
185
186 // compare it with the answerTokenValue
187 assertEquals(rootWord, answerTokenValue);
188 }
189 }
190
191 public static Test suite() {
192 return new TestSuite(TestMorph.class);
193 }
194 }
|