DefaultTokeniser.java
001 package gate.creole.tokeniser;
002 
003 import gate.*;
004 import gate.creole.*;
005 import gate.event.ProgressListener;
006 import gate.event.StatusListener;
007 import gate.util.Benchmark;
008 import gate.util.Benchmarkable;
009 import gate.util.Out;
010 
011 /**
012  * A composed tokeniser containing a {@link SimpleTokeniser} and a
013  {@link gate.creole.Transducer}.
014  * The simple tokeniser tokenises the document and the transducer processes its
015  * output.
016  */
017 public class DefaultTokeniser extends AbstractLanguageAnalyser implements Benchmarkable {
018 
019   public static final String
020     DEF_TOK_DOCUMENT_PARAMETER_NAME = "document";
021 
022   public static final String
023     DEF_TOK_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
024 
025   public static final String
026     DEF_TOK_TOKRULES_URL_PARAMETER_NAME = "tokeniserRulesURL";
027 
028   public static final String
029     DEF_TOK_GRAMRULES_URL_PARAMETER_NAME = "transducerGrammarURL";
030 
031   public static final String
032     DEF_TOK_ENCODING_PARAMETER_NAME = "encoding";
033 
034   public DefaultTokeniser() {
035   }
036 
037 
038   /** Initialise this resource, and return it. */
039   public Resource init() throws ResourceInstantiationException{
040     try{
041       //init super object
042       super.init();
043       //create all the componets
044       FeatureMap params;
045       FeatureMap features;
046 
047       params = Factory.newFeatureMap();
048       if(tokeniserRulesURL != null)
049         params.put(SimpleTokeniser.SIMP_TOK_RULES_URL_PARAMETER_NAME,
050                    tokeniserRulesURL);
051       params.put(SimpleTokeniser.SIMP_TOK_ENCODING_PARAMETER_NAME, encoding);
052 
053       if (tokeniser == null) {
054         //tokeniser
055         fireStatusChanged("Creating a tokeniser");
056         if(DEBUGOut.prln("Parameters for the tokeniser: \n" + params);
057         features = Factory.newFeatureMap();
058         Gate.setHiddenAttribute(features, true);
059         tokeniser = (SimpleTokeniser)Factory.createResource(
060                 "gate.creole.tokeniser.SimpleTokeniser",
061                 params, features);
062         tokeniser.setName("Tokeniser " + System.currentTimeMillis());
063       }
064       else {
065         tokeniser.setParameterValues(params);
066         tokeniser.reInit();
067       }
068       
069       fireProgressChanged(50);
070 
071       params = Factory.newFeatureMap();
072       if(transducerGrammarURL != null)
073         params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME,
074                 transducerGrammarURL);
075       params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, encoding);
076 
077       if (transducer == null) {
078         //transducer
079         fireStatusChanged("Creating a Jape transducer");
080         if(DEBUGOut.prln("Parameters for the transducer: \n" + params);
081         features = Factory.newFeatureMap();
082         Gate.setHiddenAttribute(features, true);
083         transducer = (Transducer)Factory.createResource("gate.creole.Transducer",
084                 params, features);
085         transducer.setName("Transducer " + System.currentTimeMillis());
086       }
087       else {
088         transducer.setParameterValues(params);
089         transducer.reInit();
090       }
091       fireProgressChanged(100);
092       fireProcessFinished();
093       
094     }catch(ResourceInstantiationException rie){
095       throw rie;
096     }catch(Exception e){
097       throw new ResourceInstantiationException(e);
098     }
099     return this;
100   }
101   
102   public void cleanup() {
103     Factory.deleteResource(transducer);
104     Factory.deleteResource(tokeniser);
105   }
106 
107   public void execute() throws ExecutionException{
108     interrupted = false;
109     //set the parameters
110     try{
111       FeatureMap params = Factory.newFeatureMap();
112       fireProgressChanged(0);
113       //tokeniser
114       params.put(SimpleTokeniser.SIMP_TOK_DOCUMENT_PARAMETER_NAME, document);
115       params.put(
116         SimpleTokeniser.SIMP_TOK_ANNOT_SET_PARAMETER_NAME, annotationSetName);
117       tokeniser.setParameterValues(params);
118 
119       //transducer
120       params.clear();
121       params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, document);
122       params.put(Transducer.TRANSD_INPUT_AS_PARAMETER_NAME, annotationSetName);
123       params.put(Transducer.TRANSD_OUTPUT_AS_PARAMETER_NAME, annotationSetName);
124       transducer.setParameterValues(params);
125     }catch(ResourceInstantiationException rie){
126       throw new ExecutionException(rie);
127     }
128 
129     ProgressListener pListener = null;
130     StatusListener sListener = null;
131     fireProgressChanged(5);
132     pListener = new IntervalProgressListener(550);
133     sListener = new StatusListener(){
134       public void statusChanged(String text){
135         fireStatusChanged(text);
136       }
137     };
138 
139     //tokeniser
140     if(isInterrupted()) throw new ExecutionInterruptedException(
141         "The execution of the \"" + getName() +
142         "\" tokeniser has been abruptly interrupted!");
143     tokeniser.addProgressListener(pListener);
144     tokeniser.addStatusListener(sListener);
145     try{
146       Benchmark.executeWithBenchmarking(tokeniser,
147               Benchmark.createBenchmarkId("simpleTokeniser",
148                       getBenchmarkId()), this, null);
149     }catch(ExecutionInterruptedException eie){
150       throw new ExecutionInterruptedException(
151         "The execution of the \"" + getName() +
152         "\" tokeniser has been abruptly interrupted!");
153     }
154     tokeniser.removeProgressListener(pListener);
155     tokeniser.removeStatusListener(sListener);
156 
157   //transducer
158     if(isInterrupted()) throw new ExecutionInterruptedException(
159         "The execution of the \"" + getName() +
160         "\" tokeniser has been abruptly interrupted!");
161     pListener = new IntervalProgressListener(50100);
162     transducer.addProgressListener(pListener);
163     transducer.addStatusListener(sListener);
164 
165     Benchmark.executeWithBenchmarking(transducer,
166             Benchmark.createBenchmarkId("transducer",
167                     getBenchmarkId()), this, null);
168     transducer.removeProgressListener(pListener);
169     transducer.removeStatusListener(sListener);
170   }//execute
171 
172 
173   /**
174    * Notifies all the PRs in this controller that they should stop their
175    * execution as soon as possible.
176    */
177   public synchronized void interrupt(){
178     interrupted = true;
179     tokeniser.interrupt();
180     transducer.interrupt();
181   }
182 
183   public void setTokeniserRulesURL(java.net.URL tokeniserRulesURL) {
184     this.tokeniserRulesURL = tokeniserRulesURL;
185   }
186   public java.net.URL getTokeniserRulesURL() {
187     return tokeniserRulesURL;
188   }
189   public void setEncoding(String encoding) {
190     this.encoding = encoding;
191   }
192   public String getEncoding() {
193     return encoding;
194   }
195   public void setTransducerGrammarURL(java.net.URL transducerGrammarURL) {
196     this.transducerGrammarURL = transducerGrammarURL;
197   }
198   public java.net.URL getTransducerGrammarURL() {
199     return transducerGrammarURL;
200   }
201  // init()
202 
203   private static final boolean DEBUG = false;
204 
205   /** the simple tokeniser used for tokenisation*/
206   protected SimpleTokeniser tokeniser;
207 
208   /** the transducer used for post-processing*/
209   protected Transducer transducer;
210   private java.net.URL tokeniserRulesURL;
211   private String encoding;
212   private java.net.URL transducerGrammarURL;
213   private String annotationSetName;
214   private String benchmarkId;
215 
216 
217   public void setAnnotationSetName(String annotationSetName) {
218     this.annotationSetName = annotationSetName;
219   }
220   public String getAnnotationSetName() {
221     return annotationSetName;
222   }
223   
224   public void setBenchmarkId(String benchmarkId) {
225     this.benchmarkId = benchmarkId;
226   }
227   
228   public String getBenchmarkId() {
229     if(benchmarkId == null) {
230       return getName();
231     }
232     else {
233       return benchmarkId;
234     }
235   }
236 }