001 package gate.creole.tokeniser;
002
003 import gate.*;
004 import gate.creole.*;
005 import gate.event.ProgressListener;
006 import gate.event.StatusListener;
007 import gate.util.Benchmark;
008 import gate.util.Benchmarkable;
009 import gate.util.Out;
010
011 /**
012 * A composed tokeniser containing a {@link SimpleTokeniser} and a
013 * {@link gate.creole.Transducer}.
014 * The simple tokeniser tokenises the document and the transducer processes its
015 * output.
016 */
017 public class DefaultTokeniser extends AbstractLanguageAnalyser implements Benchmarkable {
018
019 public static final String
020 DEF_TOK_DOCUMENT_PARAMETER_NAME = "document";
021
022 public static final String
023 DEF_TOK_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
024
025 public static final String
026 DEF_TOK_TOKRULES_URL_PARAMETER_NAME = "tokeniserRulesURL";
027
028 public static final String
029 DEF_TOK_GRAMRULES_URL_PARAMETER_NAME = "transducerGrammarURL";
030
031 public static final String
032 DEF_TOK_ENCODING_PARAMETER_NAME = "encoding";
033
034 public DefaultTokeniser() {
035 }
036
037
038 /** Initialise this resource, and return it. */
039 public Resource init() throws ResourceInstantiationException{
040 try{
041 //init super object
042 super.init();
043 //create all the componets
044 FeatureMap params;
045 FeatureMap features;
046
047 params = Factory.newFeatureMap();
048 if(tokeniserRulesURL != null)
049 params.put(SimpleTokeniser.SIMP_TOK_RULES_URL_PARAMETER_NAME,
050 tokeniserRulesURL);
051 params.put(SimpleTokeniser.SIMP_TOK_ENCODING_PARAMETER_NAME, encoding);
052
053 if (tokeniser == null) {
054 //tokeniser
055 fireStatusChanged("Creating a tokeniser");
056 if(DEBUG) Out.prln("Parameters for the tokeniser: \n" + params);
057 features = Factory.newFeatureMap();
058 Gate.setHiddenAttribute(features, true);
059 tokeniser = (SimpleTokeniser)Factory.createResource(
060 "gate.creole.tokeniser.SimpleTokeniser",
061 params, features);
062 tokeniser.setName("Tokeniser " + System.currentTimeMillis());
063 }
064 else {
065 tokeniser.setParameterValues(params);
066 tokeniser.reInit();
067 }
068
069 fireProgressChanged(50);
070
071 params = Factory.newFeatureMap();
072 if(transducerGrammarURL != null)
073 params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME,
074 transducerGrammarURL);
075 params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, encoding);
076
077 if (transducer == null) {
078 //transducer
079 fireStatusChanged("Creating a Jape transducer");
080 if(DEBUG) Out.prln("Parameters for the transducer: \n" + params);
081 features = Factory.newFeatureMap();
082 Gate.setHiddenAttribute(features, true);
083 transducer = (Transducer)Factory.createResource("gate.creole.Transducer",
084 params, features);
085 transducer.setName("Transducer " + System.currentTimeMillis());
086 }
087 else {
088 transducer.setParameterValues(params);
089 transducer.reInit();
090 }
091 fireProgressChanged(100);
092 fireProcessFinished();
093
094 }catch(ResourceInstantiationException rie){
095 throw rie;
096 }catch(Exception e){
097 throw new ResourceInstantiationException(e);
098 }
099 return this;
100 }
101
102 public void cleanup() {
103 Factory.deleteResource(transducer);
104 Factory.deleteResource(tokeniser);
105 }
106
107 public void execute() throws ExecutionException{
108 interrupted = false;
109 //set the parameters
110 try{
111 FeatureMap params = Factory.newFeatureMap();
112 fireProgressChanged(0);
113 //tokeniser
114 params.put(SimpleTokeniser.SIMP_TOK_DOCUMENT_PARAMETER_NAME, document);
115 params.put(
116 SimpleTokeniser.SIMP_TOK_ANNOT_SET_PARAMETER_NAME, annotationSetName);
117 tokeniser.setParameterValues(params);
118
119 //transducer
120 params.clear();
121 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, document);
122 params.put(Transducer.TRANSD_INPUT_AS_PARAMETER_NAME, annotationSetName);
123 params.put(Transducer.TRANSD_OUTPUT_AS_PARAMETER_NAME, annotationSetName);
124 transducer.setParameterValues(params);
125 }catch(ResourceInstantiationException rie){
126 throw new ExecutionException(rie);
127 }
128
129 ProgressListener pListener = null;
130 StatusListener sListener = null;
131 fireProgressChanged(5);
132 pListener = new IntervalProgressListener(5, 50);
133 sListener = new StatusListener(){
134 public void statusChanged(String text){
135 fireStatusChanged(text);
136 }
137 };
138
139 //tokeniser
140 if(isInterrupted()) throw new ExecutionInterruptedException(
141 "The execution of the \"" + getName() +
142 "\" tokeniser has been abruptly interrupted!");
143 tokeniser.addProgressListener(pListener);
144 tokeniser.addStatusListener(sListener);
145 try{
146 Benchmark.executeWithBenchmarking(tokeniser,
147 Benchmark.createBenchmarkId("simpleTokeniser",
148 getBenchmarkId()), this, null);
149 }catch(ExecutionInterruptedException eie){
150 throw new ExecutionInterruptedException(
151 "The execution of the \"" + getName() +
152 "\" tokeniser has been abruptly interrupted!");
153 }
154 tokeniser.removeProgressListener(pListener);
155 tokeniser.removeStatusListener(sListener);
156
157 //transducer
158 if(isInterrupted()) throw new ExecutionInterruptedException(
159 "The execution of the \"" + getName() +
160 "\" tokeniser has been abruptly interrupted!");
161 pListener = new IntervalProgressListener(50, 100);
162 transducer.addProgressListener(pListener);
163 transducer.addStatusListener(sListener);
164
165 Benchmark.executeWithBenchmarking(transducer,
166 Benchmark.createBenchmarkId("transducer",
167 getBenchmarkId()), this, null);
168 transducer.removeProgressListener(pListener);
169 transducer.removeStatusListener(sListener);
170 }//execute
171
172
173 /**
174 * Notifies all the PRs in this controller that they should stop their
175 * execution as soon as possible.
176 */
177 public synchronized void interrupt(){
178 interrupted = true;
179 tokeniser.interrupt();
180 transducer.interrupt();
181 }
182
183 public void setTokeniserRulesURL(java.net.URL tokeniserRulesURL) {
184 this.tokeniserRulesURL = tokeniserRulesURL;
185 }
186 public java.net.URL getTokeniserRulesURL() {
187 return tokeniserRulesURL;
188 }
189 public void setEncoding(String encoding) {
190 this.encoding = encoding;
191 }
192 public String getEncoding() {
193 return encoding;
194 }
195 public void setTransducerGrammarURL(java.net.URL transducerGrammarURL) {
196 this.transducerGrammarURL = transducerGrammarURL;
197 }
198 public java.net.URL getTransducerGrammarURL() {
199 return transducerGrammarURL;
200 }
201 // init()
202
203 private static final boolean DEBUG = false;
204
205 /** the simple tokeniser used for tokenisation*/
206 protected SimpleTokeniser tokeniser;
207
208 /** the transducer used for post-processing*/
209 protected Transducer transducer;
210 private java.net.URL tokeniserRulesURL;
211 private String encoding;
212 private java.net.URL transducerGrammarURL;
213 private String annotationSetName;
214 private String benchmarkId;
215
216
217 public void setAnnotationSetName(String annotationSetName) {
218 this.annotationSetName = annotationSetName;
219 }
220 public String getAnnotationSetName() {
221 return annotationSetName;
222 }
223
224 public void setBenchmarkId(String benchmarkId) {
225 this.benchmarkId = benchmarkId;
226 }
227
228 public String getBenchmarkId() {
229 if(benchmarkId == null) {
230 return getName();
231 }
232 else {
233 return benchmarkId;
234 }
235 }
236 }
|