001 /*
002 * Batch.java - transducer class
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Hamish Cunningham, 10/08/98
013 *
014 * $Id: Batch.java 13280 2010-12-08 15:09:18Z markagreenwood $
015 *
016 * DEVELOPER NOTES:
017 *
018 * This is one that got away; the relation between constructors,
019 * initTransducer and parseTransducer are totally screwy and get worse
020 * every time I add something (e.g. support for resource loading).
021 * We should probably junk this whole thing and start again....
022 */
023
024 package gate.jape;
025
026 import java.io.IOException;
027 import java.net.URL;
028 import java.util.Collections;
029 import java.util.Iterator;
030 import java.util.Vector;
031
032 import gate.*;
033 import gate.creole.ExecutionException;
034 import gate.creole.ontology.Ontology;
035 import gate.event.ProgressListener;
036 import gate.event.StatusListener;
037 import gate.util.Benchmark;
038 import gate.util.Benchmarkable;
039 import gate.util.Err;
040 import gate.util.Out;
041
042 /** Batch processing of JAPE transducers against documents or collections.
043 * Construction will parse or deserialise a transducer as required.
044 */
045 public class Batch implements JapeConstants, Benchmarkable {
046 /** Debug flag */
047 private static final boolean DEBUG = false;
048
049 /** The name of the transducer file, a .jape or .ser. */
050 // private String japeFileName;
051
052 /** The URL that points to a .jape file */
053 private URL japeURL;
054
055 /**The encoding used for reading the grammar file(s)*/
056 private String encoding;
057
058 /** The JAPE transducer. */
059 private Transducer transducer;
060
061 private ActionContext actionContext;
062
063 public void setActionContext(ActionContext ac) {
064 actionContext = ac;
065 }
066
067 /** A stream connected to the JAPE file (often null). */
068 // private InputStream japeStream = null;
069
070 /** Create non-initialised instance (private, used in main). */
071 private Batch() { }
072
073 /** Create a fully initialised instance.
074 * <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
075 * file. This may be an absolute path, or may a .jar
076 * that lives somewhere on the classpath.
077 */
078 public Batch(URL url, String encoding) throws JapeException {
079 this.japeURL = url;
080 this.encoding = encoding;
081 parseJape();
082 linkListeners();
083 } // full init constructor
084
085 public Batch(URL url, String encoding, StatusListener sListener)
086 throws JapeException {
087
088 this.addStatusListener(sListener);
089 this.japeURL = url;
090 this.encoding = encoding;
091 parseJape();
092 linkListeners();
093 } // full init constructor
094
095 private void readObject(java.io.ObjectInputStream in)
096 throws IOException, ClassNotFoundException{
097 in.defaultReadObject();
098 //now recreate the listeners
099 linkListeners();
100 }
101
102 /**
103 * Creates inner listeners that forward events from the transducer object
104 * to our own listeners.
105 */
106 protected void linkListeners(){
107 if(transducer != null){
108 transducer.addStatusListener(new StatusListener(){
109 public void statusChanged(String text){
110 fireStatusChanged(text);
111 }
112 });
113
114 transducer.addProgressListener(new ProgressListener(){
115 public void progressChanged(int value){
116 fireProgressChanged(value);
117 }
118
119 public void processFinished(){
120 fireProcessFinished();
121 }
122 });
123 }
124 }
125
126 /**
127 * Notifies this PR that it should stop its execution as soon as possible.
128 */
129 public synchronized void interrupt(){
130 transducer.interrupt();
131 }
132 /** Create a fully initialised instance.
133 * <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
134 * file. This may be an absolute path, or may a .jar
135 * that lives somewhere on the classpath.
136 */
137 /*
138 public Batch(String japeFileName) throws JapeException {
139 this.japeFileName = japeFileName;
140 initTransducer();
141 } // full init constructor
142 */
143 /*
144 public Batch(String japeFileName, StatusListener sListener)
145 throws JapeException {
146 this.japeFileName = japeFileName;
147 this.addStatusListener(sListener);
148 initTransducer();
149 } // full init constructor
150 */
151
152 /** Create a fully initialised instance from an InputStream connected
153 * to the JAPE file.
154 */
155 /*
156 public Batch(InputStream japeStream) throws JapeException {
157 if(japeStream == null)
158 throw new JapeException(
159 "attempt to create a batch parser with null input stream"
160 );
161 this.japeFileName = "stream";
162 this.japeStream = japeStream;
163 initTransducer();
164 } // full init constructor
165 */
166 /** Create a fully initialised instance from a resource path and resource
167 * name.
168 */
169 /*
170 public Batch(String resPath, String resName) throws JapeException {
171 fromResource = true;
172 this.japeFileName = resName;
173 this.resPath = resPath;
174 initTransducer();
175 } // full init constructor
176 */
177
178 /** Get the transducer. */
179 public Transducer getTransducer() { return transducer; }
180
181 /** Instantiate transducer member as necessary. */
182 /*
183 private void initTransducer()
184 throws JapeException {
185 if(fromResource) {
186 parseJape(resPath, japeFileName);
187 } else if(japeFileName.endsWith(".ser") || japeFileName.endsWith(".SER"))
188 deserialiseJape(new File(japeFileName));
189 else if(japeFileName.endsWith(".jape") || japeFileName.endsWith(".JAPE"))
190 parseJape();
191 else if(japeFileName.endsWith(".jar") || japeFileName.endsWith(".JAR"))
192 deserialiseJape();
193 else if(japeFileName.equals("stream"))
194 parseJape(japeStream);
195 else
196 throw new JapeException(
197 "unknown file type (not .jape, .ser or .jar):" + japeFileName
198 );
199 if(transducer != null) transducer.addStatusListener(new StatusListener() {
200 public void statusChanged(String text){
201 fireStatusChangedEvent(text);
202 }
203 });
204 }
205 */
206 /** Parse a jape file from {@link #japeURL} and store the transducer. */
207 private void parseJape() throws JapeException {
208 try {
209 gate.jape.parser.ParseCpsl parser = Factory.newJapeParser(japeURL, encoding);
210
211 StatusListener listener = null;
212 listener = new StatusListener(){
213 public void statusChanged(String text){
214 fireStatusChanged(text);
215 }
216 };
217 parser.addStatusListener(listener);
218 transducer = parser.MultiPhaseTransducer();
219 parser.removeStatusListener(listener);
220 //the call to finish needs to be handled from here now as it
221 //was removed from the .jj file
222 transducer.addStatusListener(listener);
223 transducer.finish();
224 transducer.removeStatusListener(listener);
225
226 } catch (gate.jape.parser.ParseException e) {
227 throw new
228 JapeException("Batch: error parsing transducer: " + e.getMessage());
229 } catch (java.io.IOException e) {
230 throw new
231 JapeException("Batch: couldn't open JAPE file: " + e.getMessage());
232 }
233 } // parseJape
234
235 /** Parse a jape file from an InputStream and store the transducer. */
236 /*
237 private void parseJape(InputStream japeStream) throws JapeException {
238 try {
239 gate.jape.parser.ParseCpsl parser =
240 new gate.jape.parser.ParseCpsl(japeFileName, japeStream);
241 transducer = parser.MultiPhaseTransducer();
242 } catch (gate.jape.parser.ParseException e) {
243 throw new
244 JapeException("Batch: error parsing transducer: " + e.getMessage());
245 } catch (java.io.IOException e) {
246 throw new
247 JapeException("Batch: couldn't read JAPE stream: " + e.getMessage());
248 }
249 } // parseJape(InputStream)
250 */
251 /** Parse a jape file from a resource and store the transducer. */
252 /*
253 private void parseJape(String resPath, String resName) throws JapeException {
254 try {
255 gate.jape.parser.ParseCpsl parser =
256 new gate.jape.parser.ParseCpsl(resPath, resName);
257 transducer = parser.MultiPhaseTransducer();
258 } catch (gate.jape.parser.ParseException e) {
259 throw new
260 JapeException("Batch: error parsing transducer: " + e.getMessage());
261 } catch (java.io.IOException e) {
262 throw new
263 JapeException("Batch: couldn't read JAPE resource: " + e.getMessage());
264 }
265 } // parseJape(resPath, resName)
266 */
267
268 /** Deserialise from a .ser file. */
269 /*
270 private void deserialiseJape(File japeFile) throws JapeException {
271
272 // set up a file input stream
273 FileInputStream japeInputStream = null;
274 try {
275 japeInputStream = new FileInputStream(japeFile.getPath());
276 } catch (IOException e) {
277 throw new JapeException(
278 "Can't read from " + japeFile.getPath() + ": " + e.getMessage()
279 );
280 }
281
282 // call the input stream deserialise method
283 deserialiseJape(japeInputStream);
284 } // deserialiseJape(File)
285 */
286 /** Deserialise from a JAR file. */
287 /*
288 private void deserialiseJape() throws JapeException {
289 // find the jar from CLASSPATH
290 //SearchPath classPath =
291 // new SearchPath(System.getProperty("java.class.path"), ".");
292 File jarFile = new File(japeFileName); //classPath.getFile(japeFileName);
293 if(jarFile == null)
294 throw new JapeException("Batch: can't find " + japeFileName);
295
296 // get a byte array input stream with the .ser in out of the jar file
297 JarFile jar = null;
298 BufferedInputStream japeInputStream = null;
299 try {
300 jar = new JarFile(jarFile.getPath());
301 japeInputStream = new BufferedInputStream(
302 jar.getInputStream(jar.getJarEntry(jarNameToSerName(japeFileName)))
303 );
304 } catch(IOException e) {
305 throw new JapeException("couldn't read jar file " + japeFileName);
306 }
307
308
309 // call the input stream deserialise method
310 deserialiseJape(japeInputStream);
311 } // deserialiseJape()
312 */
313 /** Create a transducer from an object input stream (deserialisation). */
314 /*
315 private void deserialiseJape(InputStream japeInputStream)
316 throws JapeException {
317 try {
318 ObjectInputStream ois = new ObjectInputStream(japeInputStream);
319 transducer = (Transducer) ois.readObject();
320 ois.close();
321 japeInputStream.close(); // redundant?
322 } catch (IOException e) {
323 throw new JapeException(
324 "Batch: can't deserialise InputStream (1): " + e.getMessage()
325 );
326 } catch (ClassNotFoundException e) {
327 throw new JapeException(
328 "Batch: can't deserialise InputStream (2): " + e.getMessage()
329 );
330 }
331 } // deserialise(OIS)
332 */
333 /** Create a .ser name from a .jar name. */
334 /*
335 private String jarNameToSerName(String jarName) {
336 return jarName.substring(0, jarName.length() - 4) + ".ser";
337 } // jarNameToSerName
338 */
339
340 /** Process the given collection. */
341 public void transduce(Corpus coll) throws JapeException, ExecutionException {
342 // for each doc run the transducer
343 Iterator iter = coll.iterator();
344 while(iter.hasNext()) {
345 Document doc = (Document) iter.next();
346 // transducer.transduce(doc);
347 transduce(doc, doc.getAnnotations(), doc.getAnnotations());
348 }
349 } // transduce(coll)
350
351 /** Process a single document. */
352 public void transduce(Document doc) throws JapeException, ExecutionException {
353 transduce(doc, doc.getAnnotations(), doc.getAnnotations());
354 } // transduce(doc)
355
356 /** Process a single document. */
357 public void transduce(Document doc, AnnotationSet inputAS,
358 AnnotationSet outputAS) throws JapeException,
359 ExecutionException {
360 //no need to transduce empty document
361 if (inputAS == null || inputAS.isEmpty())
362 return;
363 transducer.setActionContext(actionContext);
364 transducer.transduce(doc, inputAS, outputAS);
365
366 } // transduce(doc)
367
368 /** Process a single text. */
369 /*
370 public Document transduce(String text) throws JapeException {
371 Document doc = null;
372 try {
373 doc = Factory.newDocument(text);
374 } catch (ResourceInstantiationException e) {
375 throw new JapeException(e.toString());
376 }
377 transducer.transduce(doc, doc.getAnnotations());
378 return doc;
379 } // transduce(text)
380 */
381 /** Process a single file. */
382 /*
383 public Document transduce(File textFile) throws JapeException {
384 String text = null;
385 try {
386 text = gate.util.Files.getString(textFile);
387 } catch(IOException e) { throw new JapeException(e.toString()); }
388 return transduce(text);
389 } // transduce(textFile)
390 */
391 /** Process a set of files. */
392 /*
393 public Corpus transduce(String[] textFileNames) throws JapeException {
394 Corpus coll = null;
395 try {
396 coll = Factory.newCorpus("JAPE batch corpus");
397 Document doc = null;
398 for(int i = 0; i < textFileNames.length; i++) {
399 doc = Factory.newDocument(textFileNames[i]);
400 doc.setFeatures(Factory.newFeatureMap());
401 /*coll.createDocument(
402 textFileNames[i],
403 null, // the text - should get read from disk
404 new AnnotationSetImpl(doc),
405 Factory.newFeatureMap(),
406 Document.COPIED
407 );*/
408 /*
409 transducer.transduce(doc, doc.getAnnotations());
410 }
411 } catch(ResourceInstantiationException e) {
412 throw new JapeException(e.toString());
413 }
414 return coll;
415 } // transduce(textFileNames)
416 */
417 /** This is where it all happens. This is <I>the</I> place to be. Take
418 * your summer holidays here. Visit on Saturday nights. Buy a season
419 * ticket from <CODE>www.programmer.gone.insane.com</CODE>.
420 * <P>
421 * Takes a .jape/.jar/.ser
422 * file name (-j option) which is assumed to hold a pattern
423 * grammar for a multi-phase transducer, and a collection
424 * name (-c option) or a list of files. As needed it then parses and
425 * compiles the transducer, then transduces all the documents in the
426 * collection and saves it to disk.
427 */
428 public static void main(String args[]) {
429 /*
430 // oh great bug in the sky give us this day our daily fuckup
431 //gate.util.Debug.setDebug(true);
432 //gate.util.Debug.setDebug(Rule.class, true);
433 //gate.util.Debug.setDebug(LeftHandSide.class, true);
434 //gate.util.Debug.setDebug(BasicPatternElement.class, true);
435 //gate.util.Debug.setDebug(AnnotationSet.class, true);
436
437 // The persistent name of the collection.
438 String persCollName = null;;
439
440 // The collection to process.
441 Corpus collection = null;
442
443 // create one of us
444 Batch batch = new Batch();
445
446 // process the options
447 int i = 0;
448 for( ; i<args.length; i++) {
449 if(args[i].equals("-c") && ++i < args.length) // -c = coll name
450 persCollName = args[i];
451 else if(args[i].equals("-j") && ++i < args.length)// -j = transducer name
452 batch.japeFileName = args[i];
453 else if(args[i].equals("-v")) // -v = verbose
454 batch.setVerbose(true);
455 else if(args[i].startsWith("-"))
456 batch.usage("unknown option " + args[i]);
457 else
458 break;
459 } // for each arg
460
461 // file name list
462 String[] fileNames = null;
463 if(args.length > i) {
464 fileNames = new String[args.length - i];
465 for(int j = 0; i<args.length; j++, i++)
466 fileNames[j] = args[i];
467 }
468
469 // did they give valid options?
470 if(batch.japeFileName == null)
471 batch.usage("you must supply a transducer name");
472 if(fileNames != null && persCollName != null)
473 batch.usage("can't read a collection AND process a file list");
474
475 // parse the transducer or bomb
476 batch.message("parsing the transducer");
477 try { batch.initTransducer(); }
478 catch(JapeException e) {
479 batch.usage("oops: " + e.toString());
480 }
481
482 Corpus coll = null;
483 if(persCollName != null) { // we got a collection name, not a list of files
484
485 // open the collection or bomb
486 coll = null;
487 batch.message("opening the collection");
488 try {
489 coll = Factory.newCorpus(persCollName);
490 } catch(ResourceInstantiationException e) {
491 batch.usage("oops (x): " + e);
492 }
493
494 // transduce
495 batch.message("calling transducer");
496 try { batch.transduce(coll); }
497 catch(JapeException e) {
498 batch.usage("oops (1): " + e.toString());
499 }
500
501 // save to disk
502 batch.message("saving the collection");
503 batch.usage("couldn't sync coll ");
504
505 // we got a list of files, not a collection
506 } else {
507 batch.message("transducing transient collection");
508 try {
509 coll = batch.transduce(fileNames);
510 } catch(JapeException e) {
511 batch.usage("oops (2): " + e.toString());
512 }
513 }
514
515 // we won! we won! we can smash up all the computers now!
516 batch.message("done");
517 //System.exit(0);
518 */
519 } // main
520
521
522 /** Whether to print progress messages or not. */
523 private boolean verbose = false;
524
525 /** Set verbosity. */
526 public void setVerbose(boolean turtleSoup) { verbose = turtleSoup; }
527
528 /** You got something wrong, dumbo. */
529 public void usage(String errorMessage) {
530 String usageMessage =
531 "usage: java gate.jape.Batch.main [-v] " +
532 "-j japefile(.ser|.jape|.jar) " +
533 "(-c CollectionName | filenames)";
534
535 Err.println(errorMessage);
536 Err.println(usageMessage);
537 // System.exit(1);
538
539 } // usage
540
541 /** Hello? Anybody there?? */
542 public void message(String mess) {
543 if(verbose) Out.println("Batch: " + mess);
544 } // message
545
546 public void setFeatures(gate.FeatureMap newFeatures) {
547 features = newFeatures;
548 }
549 public gate.FeatureMap getFeatures() {
550 return features;
551 }
552 public synchronized void removeProgressListener(ProgressListener l) {
553 if (progressListeners != null && progressListeners.contains(l)) {
554 Vector v = (Vector) progressListeners.clone();
555 v.removeElement(l);
556 progressListeners = v;
557 }
558 }
559 public synchronized void addProgressListener(ProgressListener l) {
560 Vector v = progressListeners == null ? new Vector(2) : (Vector) progressListeners.clone();
561 if (!v.contains(l)) {
562 v.addElement(l);
563 progressListeners = v;
564 }
565 }
566
567 //ProcessProgressReporter implementation ends here
568
569 /** Are we initialising from a resource? */
570 // private boolean fromResource = false;
571
572 /** Path to the resources tree */
573 // private String resPath = null;
574
575
576 private gate.FeatureMap features;
577 private transient Vector progressListeners;
578 private transient Vector statusListeners;
579 private boolean enableDebugging;
580
581 protected void fireProgressChanged(int e) {
582 if (progressListeners != null) {
583 Vector listeners = progressListeners;
584 int count = listeners.size();
585 for (int i = 0; i < count; i++) {
586 ((ProgressListener) listeners.elementAt(i)).progressChanged(e);
587 }
588 }
589 }
590 protected void fireProcessFinished() {
591 if (progressListeners != null) {
592 Vector listeners = progressListeners;
593 int count = listeners.size();
594 for (int i = 0; i < count; i++) {
595 ((ProgressListener) listeners.elementAt(i)).processFinished();
596 }
597 }
598 }
599 public synchronized void removeStatusListener(StatusListener l) {
600 if (statusListeners != null && statusListeners.contains(l)) {
601 Vector v = (Vector) statusListeners.clone();
602 v.removeElement(l);
603 statusListeners = v;
604 }
605 }
606 public synchronized void addStatusListener(StatusListener l) {
607 Vector v = statusListeners == null ? new Vector(2) : (Vector) statusListeners.clone();
608 if (!v.contains(l)) {
609 v.addElement(l);
610 statusListeners = v;
611 }
612 }
613 protected void fireStatusChanged(String e) {
614 if (statusListeners != null) {
615 Vector listeners = statusListeners;
616 int count = listeners.size();
617 for (int i = 0; i < count; i++) {
618 ((StatusListener) listeners.elementAt(i)).statusChanged(e);
619 }
620 }
621 }
622
623 /**
624 * Sets the ontology to be used by the transducers
625 * @param ontology
626 */
627 public void setOntology(gate.creole.ontology.Ontology ontology) {
628 transducer.setOntology(ontology);
629 }
630 public boolean isEnableDebugging() {
631 return enableDebugging;
632 }
633 public void setEnableDebugging(boolean enableDebugging) {
634 this.enableDebugging = enableDebugging;
635 //propagate
636 if(transducer != null) transducer.setEnableDebugging(enableDebugging);
637 }
638
639 /* (non-Javadoc)
640 * @see gate.util.Benchmarkable#getBenchmarkId()
641 */
642 public String getBenchmarkId() {
643 return transducer.getBenchmarkId();
644 }
645
646 /* (non-Javadoc)
647 * @see gate.util.Benchmarkable#setBenchmarkId(java.lang.String)
648 */
649 public void setBenchmarkId(String benchmarkId) {
650 transducer.setBenchmarkId(benchmarkId);
651 }
652
653 public void runControllerExecutionAbortedBlock(
654 ActionContext ac, Controller c, Throwable t, Ontology o) throws ExecutionException {
655 transducer.runControllerExecutionAbortedBlock(ac, c, t, o);
656 }
657
658 public void runControllerExecutionFinishedBlock(
659 ActionContext ac, Controller c, Ontology o) throws ExecutionException {
660 transducer.runControllerExecutionFinishedBlock(ac, c, o);
661 }
662
663 public void runControllerExecutionStartedBlock(
664 ActionContext ac, Controller c, Ontology o) throws ExecutionException {
665 transducer.runControllerExecutionStartedBlock(ac, c, o);
666 }
667
668 /*
669 private void writeObject(ObjectOutputStream oos) throws IOException {
670 Out.prln("writing batch");
671 oos.defaultWriteObject();
672 Out.prln("finished writing batch");
673 } // writeObject
674 */
675
676 } // class Batch
|