001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * Valentin Tablan 08/05/2008
011 *
012 * $Id: RealtimeCorpusController.java 13299 2010-12-22 11:11:04Z ian_roberts $
013 *
014 */package gate.creole;
015
016 import java.util.Timer;
017 import java.util.TimerTask;
018 import java.util.HashMap;
019 import java.util.concurrent.Callable;
020 import java.util.concurrent.ExecutorService;
021 import java.util.concurrent.Executors;
022 import java.util.concurrent.Future;
023 import java.util.concurrent.ScheduledExecutorService;
024 import java.util.concurrent.ThreadFactory;
025 import java.util.concurrent.TimeUnit;
026 import java.util.concurrent.TimeoutException;
027
028 import org.apache.log4j.Logger;
029
030 import gate.*;
031 import gate.creole.metadata.*;
032 import gate.util.Err;
033 import gate.util.profile.Profiler;
034 import gate.util.Out;
035 /**
036 * A custom GATE controller that interrupts the execution over a document when a
037 * specified amount of time has elapsed. It also ignores all errors/exceptions
038 * that may occur during execution and simply carries on with the next document
039 * when that happens.
040 */
041 @CreoleResource(name = "Real-Time Corpus Pipeline",
042 comment = "A serial controller for PR pipelines over corpora which "
043 + "limits the run time of each PR.",
044 icon = "application-realtime",
045 helpURL = "http://gate.ac.uk/userguide/sec:creole-model:applications")
046 public class RealtimeCorpusController extends SerialAnalyserController {
047
048 private final static boolean DEBUG = false;
049
050 /**
051 * Shared logger object.
052 */
053 private static final Logger logger = Logger.getLogger(
054 RealtimeCorpusController.class);
055
056 /** Profiler to track PR execute time */
057 protected Profiler prof;
058 protected HashMap<String,Long> timeMap;
059
060 /**
061 * An executor service used to execute the PRs over the document .
062 */
063 protected ExecutorService threadSource;
064
065 /**
066 * The tread currently running the document processing.
067 */
068 protected volatile Thread currentWorkingThread;
069
070 public RealtimeCorpusController(){
071 super();
072 if(DEBUG) {
073 prof = new Profiler();
074 prof.enableGCCalling(false);
075 prof.printToSystemOut(true);
076 timeMap = new HashMap<String,Long>();
077 }
078 }
079
080 protected class DocRunner implements Callable<Object>{
081
082 public DocRunner(Document document) {
083 this.document = document;
084 }
085
086 public Object call() {
087 try {
088 // save a reference to the executor thread
089 currentWorkingThread = Thread.currentThread();
090 // run the system over the current document
091 // set the doc and corpus
092 for(int j = 0; j < prList.size(); j++) {
093 ((LanguageAnalyser)prList.get(j)).setDocument(document);
094 ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
095 }
096 interrupted = false;
097 // execute the PRs
098 // check all the PRs have the right parameters
099 checkParameters();
100 if(DEBUG) {
101 prof.initRun("Execute controller [" + getName() + "]");
102 }
103
104 // execute all PRs in sequence
105 interrupted = false;
106 for(int j = 0; j < prList.size(); j++) {
107 if(isInterrupted())
108 throw new ExecutionInterruptedException("The execution of the "
109 + getName() + " application has been abruptly interrupted!");
110
111 if(Thread.currentThread().isInterrupted()) {
112 Err.println("Execution on document " + document.getName()
113 + " has been stopped");
114 break;
115 }
116
117 try {
118 runComponent(j);
119 }
120 catch(Throwable e) {
121 if(!Thread.currentThread().isInterrupted()) throw e;
122
123 Err.println("Execution on document " + document.getName()
124 + " has been stopped");
125 break;
126 }
127
128 if(DEBUG) {
129 prof.checkPoint("~Execute PR ["
130 + ((ProcessingResource)prList.get(j)).getName() + "]");
131 Long timeOfPR = timeMap.get(((ProcessingResource)prList.get(j))
132 .getName());
133 if(timeOfPR == null)
134 timeMap.put(((ProcessingResource)prList.get(j)).getName(),
135 new Long(prof.getLastDuration()));
136 else timeMap.put(((ProcessingResource)prList.get(j)).getName(),
137 new Long(timeOfPR.longValue() + prof.getLastDuration()));
138 Out.println("Time taken so far by "
139 + ((ProcessingResource)prList.get(j)).getName()
140 + ": "
141 + timeMap.get(((ProcessingResource)prList.get(j)).getName()));
142 }
143 }
144 }
145 catch(ThreadDeath td) {
146 // special case as we need to re-throw this one
147 Err.prln("Execution on document " + document.getName()
148 + " has been stopped");
149 throw (td);
150 }
151 catch(Throwable cause) {
152 Err.prln("Execution on document " + document.getName()
153 + " has caused an error:\n=========================");
154 cause.printStackTrace(Err.getPrintWriter());
155 Err.prln("=========================\nError ignored...\n");
156 }
157 finally {
158 // remove the reference to the thread, as we're now done
159 currentWorkingThread = null;
160 // unset the doc and corpus
161 for(int j = 0; j < prList.size(); j++) {
162 ((LanguageAnalyser)prList.get(j)).setDocument(null);
163 ((LanguageAnalyser)prList.get(j)).setCorpus(null);
164 }
165
166 if(DEBUG) {
167 prof.checkPoint("Execute controller [" + getName() + "] finished");
168 }
169 }
170
171 return null;
172 }
173 private Document document;
174 }
175
176 @Override
177 public void cleanup() {
178 threadSource.shutdownNow();
179 super.cleanup();
180 }
181
182 @Override
183 public Resource init() throws ResourceInstantiationException {
184 // we normally require 2 threads: one to execute the PRs and another one to
185 // to execute the job stoppers. More threads are created as required. We
186 // use a custom ThreadFactory that returns daemon threads so we don't block
187 // GATE from exiting if this controller has not been properly disposed of.
188 threadSource = Executors.newSingleThreadExecutor(new ThreadFactory() {
189 private ThreadFactory dtf = Executors.defaultThreadFactory();
190 public Thread newThread(Runnable r) {
191 Thread t = dtf.newThread(r);
192 t.setDaemon(true);
193 return t;
194 }
195 });
196 return super.init();
197 }
198
199 /** Run the Processing Resources in sequence. */
200 public void executeImpl() throws ExecutionException{
201 interrupted = false;
202 if(corpus == null) throw new ExecutionException(
203 "(SerialAnalyserController) \"" + getName() + "\":\n" +
204 "The corpus supplied for execution was null!");
205 //iterate through the documents in the corpus
206 for(int i = 0; i < corpus.size(); i++){
207 if(isInterrupted()) throw new ExecutionInterruptedException(
208 "The execution of the " + getName() +
209 " application has been abruptly interrupted!");
210
211 boolean docWasLoaded = corpus.isDocumentLoaded(i);
212 Document doc = (Document)corpus.get(i);
213 // start the execution, in the separate thread
214 Future<?> docRunnerFuture = threadSource.submit(new DocRunner(doc));
215 // how long have we already waited
216 long waitSoFar = 0;
217 // check if we should use graceful stop first
218 if (graceful != -1 && (timeout == -1 || graceful < timeout )) {
219 try {
220 docRunnerFuture.get(graceful, TimeUnit.MILLISECONDS);
221 } catch(TimeoutException e) {
222 // we waited the graceful period, and the task did not finish
223 // -> interrupt the job (nicely)
224 waitSoFar += graceful;
225 logger.info("Execution timeout, attempting to gracefully stop worker thread...");
226 // interrupt the working thread - we can't cancel the future as
227 // that would cause future get() calls to fail immediately with
228 // a CancellationException
229 Thread t = currentWorkingThread;
230 if(t != null) {
231 t.interrupt();
232 }
233 for(int j = 0; j < prList.size(); j++){
234 ((Executable)prList.get(j)).interrupt();
235 }
236 // next check scheduled for
237 // - half-time between graceful and timeout, or
238 // - graceful-and-a-half (if no timeout)
239 long waitTime = (timeout != -1) ?
240 (timeout - graceful) / 2 :
241 (graceful / 2);
242 try {
243 docRunnerFuture.get(waitTime, TimeUnit.MILLISECONDS);
244 } catch(TimeoutException e1) {
245 // the mid point has been reached: try nullify
246 waitSoFar += waitTime;
247 logger.info("Execution timeout, attempting to induce exception in order to stop worker thread...");
248 for(int j = 0; j < prList.size(); j++){
249 ((LanguageAnalyser)prList.get(j)).setDocument(null);
250 ((LanguageAnalyser)prList.get(j)).setCorpus(null);
251 }
252 } catch(InterruptedException e1) {
253 // the current thread (not the execution thread!) was interrupted
254 // throw it forward
255 Thread.currentThread().interrupt();
256 } catch(java.util.concurrent.ExecutionException e2) {
257 throw new ExecutionException(e2);
258 }
259 } catch(java.util.concurrent.ExecutionException e) {
260 throw new ExecutionException(e);
261 } catch(InterruptedException e) {
262 // the current thread (not the execution thread!) was interrupted
263 // throw it forward
264 Thread.currentThread().interrupt();
265 }
266 }
267 // wait before we call stop()
268 if(timeout != -1) {
269 long waitTime = timeout - waitSoFar;
270 if(waitTime > 0) {
271 try {
272 docRunnerFuture.get(waitTime, TimeUnit.MILLISECONDS);
273 } catch(TimeoutException e) {
274 // we're out of time: stop the thread
275 logger.info("Execution timeout, worker thread will be forcibly terminated!");
276 // using a volatile variable instead of synchronisation
277 Thread theThread = currentWorkingThread;
278 if(theThread != null) theThread.stop();
279 } catch(InterruptedException e) {
280 // the current thread (not the execution thread!) was interrupted
281 // throw it forward
282 Thread.currentThread().interrupt();
283 } catch(java.util.concurrent.ExecutionException e) {
284 throw new ExecutionException(e);
285 }
286 } else {
287 // stop now!
288 logger.info("Execution timeout, worker thread will be forcibly terminated!");
289 // using a volatile variable instead of synchronisation
290 Thread theThread = currentWorkingThread;
291 if(theThread != null) theThread.stop();
292 }
293 }
294
295 // at this point we finished execution (one way or another)
296 if(!docWasLoaded){
297 //trigger saving
298 getCorpus().unloadDocument(doc);
299 //close the previously unloaded Doc
300 Factory.deleteResource(doc);
301 }
302 }
303 }
304
305
306 /**
307 * The timeout in milliseconds before execution on a document is
308 * forcibly stopped (forcibly stopping execution may result in memory leaks
309 * and/or unexpected behaviour).
310 */
311 protected Long timeout;
312
313 /**
314 * Gets the timeout in milliseconds before execution on a document is
315 * forcibly stopped (forcibly stopping execution may result in memory leaks
316 * and/or unexpected behaviour).
317 * @return
318 */
319 public Long getTimeout() {
320 return timeout;
321 }
322
323
324 /**
325 * Sets the timeout in milliseconds before execution on a document is
326 * forcibly stopped (forcibly stopping execution may result in memory leaks
327 * and/or unexpected behaviour).
328 * @param timeout
329 */
330 @CreoleParameter(defaultValue = "60000",
331 comment = "Timeout in milliseconds before execution on a document is forcibly stopped (forcibly stopping execution may result in memory leaks and/or unexpected behaviour)")
332 public void setTimeout(Long timeout) {
333 this.timeout = timeout;
334 }
335
336 /**
337 * The timeout in milliseconds before execution on a document is
338 * gracefully stopped. Defaults to -1 which disables this functionality and
339 * relies, as previously, on forcibly stopping execution.
340 */
341 protected Long graceful;
342
343 /**
344 * Gets the timeout in milliseconds before execution on a document is
345 * gracefully stopped. Defaults to -1 which disables this functionality and
346 * relies, as previously, on forcibly stopping execution.
347 * @return
348 */
349 public Long getGracefulTimeout() {
350 return graceful;
351 }
352
353 /**
354 * Sets the timeout in milliseconds before execution on a document is
355 * gracefully stopped. Defaults to -1 which disables this functionality and
356 * relies, as previously, on forcibly stopping execution.
357 * @param graceful
358 */
359 @CreoleParameter(defaultValue = "-1",
360 comment = "Timeout in milliseconds before execution on a document is gracefully stopped. Defaults to -1 which disables this functionality and relies, as previously, on forcibly stoping execution.")
361 public void setGracefulTimeout(Long graceful) {
362 this.graceful = graceful;
363 }
364
365 /**
366 * Sleep time in milliseconds while waiting for worker thread to finish.
367 */
368 private static final int POLL_INTERVAL = 50;
369 }
|