001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * Valentin Tablan 08/10/2001
011 *
012 * $Id: ConditionalSerialAnalyserController.java 13702 2011-04-19 12:09:54Z ian_roberts $
013 *
014 */
015
016 package gate.creole;
017
018 import java.util.*;
019
020 import gate.*;
021 import gate.creole.metadata.*;
022 import gate.event.CreoleEvent;
023 import gate.util.*;
024
025 /**
026 * This class implements a SerialController that only contains
027 * {@link gate.LanguageAnalyser}s.
028 * It has a {@link gate.Corpus} and its execute method runs all the analysers in
029 * turn over each of the documents in the corpus.
030 * This is a copy of the {@link SerialAnalyserController}, the only difference
031 * being that it inherits from {@link ConditionalSerialController} rather than
032 * from {@link SerialController} which makes it a <b>conditional</b> serial
033 * analyser controller.
034 */
035 @CreoleResource(name = "Conditional Corpus Pipeline",
036 comment = "A serial controller for conditionally run PR pipelines "
037 + "over corpora.",
038 helpURL = "http://gate.ac.uk/userguide/sec:developer:cond")
039 public class ConditionalSerialAnalyserController
040 extends ConditionalSerialController
041 implements CorpusController, LanguageAnalyser {
042
043 /** Debug flag */
044 private static final boolean DEBUG = false;
045
046
047 /**
048 * @return the document
049 */
050 public Document getDocument() {
051 return document;
052 }
053
054 /**
055 * @param document the document to set
056 */
057 @Optional
058 @RunTime
059 @CreoleParameter
060 public void setDocument(Document document) {
061 this.document = document;
062 }
063
064 public gate.Corpus getCorpus() {
065 return corpus;
066 }
067
068 public void setCorpus(gate.Corpus corpus) {
069 this.corpus = corpus;
070 }
071
072 /** Run the Processing Resources in sequence. */
073 protected void executeImpl() throws ExecutionException{
074 interrupted = false;
075 if(corpus == null) throw new ExecutionException(
076 "(ConditionalSerialAnalyserController) \"" + getName() + "\":\n" +
077 "The corpus supplied for execution was null!");
078
079 benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName());
080
081
082 if(document == null){
083 //running as a top-level controller -> execute over all documents in
084 //sequence
085 // iterate through the documents in the corpus
086 for(int i = 0; i < corpus.size(); i++) {
087 String savedBenchmarkId = getBenchmarkId();
088 try {
089 if(isInterrupted()) {
090 throw new ExecutionInterruptedException("The execution of the "
091 + getName() + " application has been abruptly interrupted!");
092 }
093
094 boolean docWasLoaded = corpus.isDocumentLoaded(i);
095
096 // record the time before loading the document
097 long documentLoadingStartTime = Benchmark.startPoint();
098
099 Document doc = (Document)corpus.get(i);
100
101 // include the document name in the benchmark ID for sub-events
102 setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(),
103 getBenchmarkId()));
104 // report the document loading
105 benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
106 Benchmark.checkPoint(documentLoadingStartTime,
107 Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED,
108 getBenchmarkId()), this, benchmarkFeatures);
109
110 // run the system over this document
111 // set the doc and corpus
112 for(int j = 0; j < prList.size(); j++) {
113 ((LanguageAnalyser)prList.get(j)).setDocument(doc);
114 ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
115 }
116
117 try {
118 if(DEBUG)
119 Out.pr("SerialAnalyserController processing doc=" + doc.getName()
120 + "...");
121
122 super.executeImpl();
123 if(DEBUG) Out.prln("done.");
124 }
125 finally {
126 // make sure we unset the doc and corpus even if we got an exception
127 for(int j = 0; j < prList.size(); j++) {
128 ((LanguageAnalyser)prList.get(j)).setDocument(null);
129 ((LanguageAnalyser)prList.get(j)).setCorpus(null);
130 }
131 }
132
133 if(!docWasLoaded) {
134 long documentSavingStartTime = Benchmark.startPoint();
135 // trigger saving
136 corpus.unloadDocument(doc);
137 Benchmark.checkPoint(documentSavingStartTime,
138 Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED,
139 getBenchmarkId()), this, benchmarkFeatures);
140
141 // close the previously unloaded Doc
142 Factory.deleteResource(doc);
143 }
144 }
145 finally {
146 setBenchmarkId(savedBenchmarkId);
147 }
148 }
149 }else{
150 //document is set, so we run as a contained controller (i.e. as a compound
151 //Language Analyser
152 // run the system over this document
153 // set the doc and corpus
154 for(int j = 0; j < prList.size(); j++) {
155 ((LanguageAnalyser)prList.get(j)).setDocument(document);
156 ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
157 }
158
159 try {
160 if(DEBUG)
161 Out.pr("SerialAnalyserController processing doc=" + document.getName()
162 + "...");
163
164 super.executeImpl();
165 if(DEBUG) Out.prln("done.");
166 }
167 finally {
168 // make sure we unset the doc and corpus even if we got an exception
169 for(int j = 0; j < prList.size(); j++) {
170 ((LanguageAnalyser)prList.get(j)).setDocument(null);
171 ((LanguageAnalyser)prList.get(j)).setCorpus(null);
172 }
173 }
174 }//document was not null
175
176
177
178 // //iterate through the documents in the corpus
179 // for(int i = 0; i < corpus.size(); i++){
180 // if(isInterrupted()) throw new ExecutionInterruptedException(
181 // "The execution of the " + getName() +
182 // " application has been abruptly interrupted!");
183 //
184 // boolean docWasLoaded = corpus.isDocumentLoaded(i);
185 //
186 // // record the time before loading the document
187 // long documentLoadingStartTime = Benchmark.startPoint();
188 //
189 // Document doc = (Document)corpus.get(i);
190 //
191 // // report the document loading
192 // benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
193 // Benchmark.checkPoint(documentLoadingStartTime,
194 // Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED,
195 // getBenchmarkId()), this, benchmarkFeatures);
196 // //run the system over this document
197 // //set the doc and corpus
198 // for(int j = 0; j < prList.size(); j++){
199 // ((LanguageAnalyser)prList.get(j)).setDocument(doc);
200 // ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
201 // }
202 //
203 // try{
204 // if (DEBUG)
205 // Out.pr("ConditionalSerialAnalyserController processing doc=" + doc.getName()+ "...");
206 // super.executeImpl();
207 // if (DEBUG)
208 // Out.prln("done.");
209 // }
210 // finally {
211 // // make sure we unset the doc and corpus even if we got an exception
212 // for(int j = 0; j < prList.size(); j++){
213 // ((LanguageAnalyser)prList.get(j)).setDocument(null);
214 // ((LanguageAnalyser)prList.get(j)).setCorpus(null);
215 // }
216 // }
217 //
218 // if(!docWasLoaded){
219 // long documentSavingStartTime = Benchmark.startPoint();
220 // // trigger saving
221 // corpus.unloadDocument(doc);
222 // Benchmark.checkPoint(documentSavingStartTime,
223 // Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED,
224 // getBenchmarkId()), this, benchmarkFeatures);
225 // //close the previoulsy unloaded Doc
226 // Factory.deleteResource(doc);
227 // }
228 // }
229 }
230
231 /**
232 * Overidden from {@link SerialController} to only allow
233 * {@link LanguageAnalyser}s as components.
234 */
235 public void add(ProcessingResource pr){
236 checkLanguageAnalyser(pr);
237 super.add(pr);
238 }
239
240 /**
241 * Overidden from {@link SerialController} to only allow
242 * {@link LanguageAnalyser}s as components.
243 */
244 public void add(int index, ProcessingResource pr) {
245 checkLanguageAnalyser(pr);
246 super.add(index, pr);
247 }
248
249 /**
250 * Throw an exception if the given processing resource is not
251 * a LanguageAnalyser.
252 */
253 protected void checkLanguageAnalyser(ProcessingResource pr) {
254 if(!(pr instanceof LanguageAnalyser)) {
255 throw new GateRuntimeException(getClass().getName() +
256 " only accepts " +
257 LanguageAnalyser.class.getName() +
258 "s as components\n" +
259 pr.getClass().getName() +
260 " is not!");
261 }
262 }
263
264 /**
265 * Sets the current document to the memeber PRs
266 */
267 protected void setDocToPrs(Document doc){
268 Iterator prIter = getPRs().iterator();
269 while(prIter.hasNext()){
270 ((LanguageAnalyser)prIter.next()).setDocument(doc);
271 }
272 }
273
274
275 /**
276 * Checks whether all the contained PRs have all the required runtime
277 * parameters set. Ignores the corpus and document parameters as these will
278 * be set at run time.
279 *
280 * @return a {@link List} of {@link ProcessingResource}s that have required
281 * parameters with null values if they exist <tt>null</tt> otherwise.
282 * @throws {@link ResourceInstantiationException} if problems occur while
283 * inspecting the parameters for one of the resources. These will normally be
284 * introspection problems and are usually caused by the lack of a parameter
285 * or of the read accessor for a parameter.
286 */
287 public List getOffendingPocessingResources()
288 throws ResourceInstantiationException{
289 //take all the contained PRs
290 ArrayList badPRs = new ArrayList(getPRs());
291 //remove the ones that no parameters problems
292 Iterator prIter = getPRs().iterator();
293 while(prIter.hasNext()){
294 ProcessingResource pr = (ProcessingResource)prIter.next();
295 ResourceData rData = (ResourceData)Gate.getCreoleRegister().
296 get(pr.getClass().getName());
297 //this is a list of lists
298 List parameters = rData.getParameterList().getRuntimeParameters();
299 //remove corpus and document
300 List newParameters = new ArrayList();
301 Iterator pDisjIter = parameters.iterator();
302 while(pDisjIter.hasNext()){
303 List aDisjunction = (List)pDisjIter.next();
304 List newDisjunction = new ArrayList(aDisjunction);
305 Iterator internalParIter = newDisjunction.iterator();
306 while(internalParIter.hasNext()){
307 Parameter parameter = (Parameter)internalParIter.next();
308 if(parameter.getName().equals("corpus") ||
309 parameter.getName().equals("document")) internalParIter.remove();
310 }
311 if(!newDisjunction.isEmpty()) newParameters.add(newDisjunction);
312 }
313
314 if(AbstractResource.checkParameterValues(pr, newParameters)){
315 badPRs.remove(pr);
316 }
317 }
318 return badPRs.isEmpty() ? null : badPRs;
319 }
320
321
322 protected gate.Corpus corpus;
323
324
325 /**
326 * The document being processed. This is part of the {@link LanguageAnalyser}
327 * interface, so this value is only used when the controller is used as a
328 * member of another controller.
329 */
330 protected Document document;
331
332
333 /**
334 * Overridden to also clean up the corpus value.
335 */
336 public void resourceUnloaded(CreoleEvent e) {
337 super.resourceUnloaded(e);
338 if(e.getResource() == corpus){
339 setCorpus(null);
340 }
341 }
342 }
|