001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * Valentin Tablan 08/10/2001
011 *
012 * $Id: SerialAnalyserController.java 13702 2011-04-19 12:09:54Z ian_roberts $
013 *
014 */
015
016 package gate.creole;
017
018 import java.util.*;
019
020 import gate.*;
021 import gate.creole.metadata.*;
022 import gate.event.CreoleEvent;
023 import gate.util.*;
024
025 /**
026 * This class implements a SerialController that only contains
027 * {@link gate.LanguageAnalyser}s. It has a {@link gate.Corpus} and its execute
028 * method runs all the analysers in turn over each of the documents in the
029 * corpus.
030 */
031 @CreoleResource(name = "Corpus Pipeline",
032 comment = "A serial controller for PR pipelines over corpora.",
033 helpURL = "http://gate.ac.uk/userguide/sec:developer:apps")
034 public class SerialAnalyserController extends SerialController
035 implements CorpusController, LanguageAnalyser {
036
037 /** Debug flag */
038 private static final boolean DEBUG = false;
039
040 /**
041 * @return the document
042 */
043 public Document getDocument() {
044 return document;
045 }
046
047 /**
048 * @param document the document to set
049 */
050 @Optional
051 @RunTime
052 @CreoleParameter
053 public void setDocument(Document document) {
054 this.document = document;
055 }
056
057 public gate.Corpus getCorpus() {
058 return corpus;
059 }
060
061 public void setCorpus(gate.Corpus corpus) {
062 this.corpus = corpus;
063 }
064
065 /** Run the Processing Resources in sequence. */
066 protected void executeImpl() throws ExecutionException {
067 interrupted = false;
068 if(corpus == null)
069 throw new ExecutionException("(SerialAnalyserController) \"" + getName()
070 + "\":\n" + "The corpus supplied for execution was null!");
071
072 benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName());
073
074 // reset the prTimeMap that keeps track of the time
075 // taken by each PR to process the entire corpus
076 super.resetPrTimeMap();
077
078 if(document == null){
079 //running as a top-level controller -> execute over all documents in
080 //sequence
081 // iterate through the documents in the corpus
082 for(int i = 0; i < corpus.size(); i++) {
083 String savedBenchmarkId = getBenchmarkId();
084 try {
085 if(isInterrupted()) {
086 throw new ExecutionInterruptedException("The execution of the "
087 + getName() + " application has been abruptly interrupted!");
088 }
089
090 boolean docWasLoaded = corpus.isDocumentLoaded(i);
091
092 // record the time before loading the document
093 long documentLoadingStartTime = Benchmark.startPoint();
094
095 Document doc = (Document)corpus.get(i);
096
097 // include the document name in the benchmark ID for sub-events
098 setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(),
099 getBenchmarkId()));
100 // report the document loading
101 benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
102 Benchmark.checkPoint(documentLoadingStartTime,
103 Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED,
104 getBenchmarkId()), this, benchmarkFeatures);
105
106 // run the system over this document
107 // set the doc and corpus
108 for(int j = 0; j < prList.size(); j++) {
109 ((LanguageAnalyser)prList.get(j)).setDocument(doc);
110 ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
111 }
112
113 try {
114 if(DEBUG)
115 Out.pr("SerialAnalyserController processing doc=" + doc.getName()
116 + "...");
117
118 super.executeImpl();
119 if(DEBUG) Out.prln("done.");
120 }
121 finally {
122 // make sure we unset the doc and corpus even if we got an exception
123 for(int j = 0; j < prList.size(); j++) {
124 ((LanguageAnalyser)prList.get(j)).setDocument(null);
125 ((LanguageAnalyser)prList.get(j)).setCorpus(null);
126 }
127 }
128
129 if(!docWasLoaded) {
130 long documentSavingStartTime = Benchmark.startPoint();
131 // trigger saving
132 corpus.unloadDocument(doc);
133 Benchmark.checkPoint(documentSavingStartTime,
134 Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED,
135 getBenchmarkId()), this, benchmarkFeatures);
136
137 // close the previoulsy unloaded Doc
138 Factory.deleteResource(doc);
139 }
140 }
141 finally {
142 setBenchmarkId(savedBenchmarkId);
143 }
144 }
145 }else{
146 //document is set, so we run as a contained controller (i.e. as a compound
147 //Language Analyser
148 // run the system over this document
149 // set the doc and corpus
150 for(int j = 0; j < prList.size(); j++) {
151 ((LanguageAnalyser)prList.get(j)).setDocument(document);
152 ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
153 }
154
155 try {
156 if(DEBUG)
157 Out.pr("SerialAnalyserController processing doc=" + document.getName()
158 + "...");
159
160 super.executeImpl();
161 if(DEBUG) Out.prln("done.");
162 }
163 finally {
164 // make sure we unset the doc and corpus even if we got an exception
165 for(int j = 0; j < prList.size(); j++) {
166 ((LanguageAnalyser)prList.get(j)).setDocument(null);
167 ((LanguageAnalyser)prList.get(j)).setCorpus(null);
168 }
169 }
170 }//document was not null
171
172
173
174 // remove the features that we added
175 benchmarkFeatures.remove(Benchmark.DOCUMENT_NAME_FEATURE);
176 benchmarkFeatures.remove(Benchmark.CORPUS_NAME_FEATURE);
177 }
178
179 /**
180 * Overidden from {@link SerialController} to only allow
181 * {@link LanguageAnalyser}s as components.
182 */
183 public void add(ProcessingResource pr){
184 checkLanguageAnalyser(pr);
185 super.add(pr);
186 }
187
188 /**
189 * Overidden from {@link SerialController} to only allow
190 * {@link LanguageAnalyser}s as components.
191 */
192 public void add(int index, ProcessingResource pr) {
193 checkLanguageAnalyser(pr);
194 super.add(index, pr);
195 }
196
197 /**
198 * Throw an exception if the given processing resource is not
199 * a LanguageAnalyser.
200 */
201 protected void checkLanguageAnalyser(ProcessingResource pr) {
202 if(!(pr instanceof LanguageAnalyser)) {
203 throw new GateRuntimeException(getClass().getName() +
204 " only accepts " +
205 LanguageAnalyser.class.getName() +
206 "s as components\n" +
207 pr.getClass().getName() +
208 " is not!");
209 }
210 }
211
212 /**
213 * Sets the current document to the memeber PRs
214 */
215 protected void setDocToPrs(Document doc) {
216 Iterator prIter = getPRs().iterator();
217 while(prIter.hasNext()) {
218 ((LanguageAnalyser)prIter.next()).setDocument(doc);
219 }
220 }
221
222 /**
223 * Checks whether all the contained PRs have all the required runtime
224 * parameters set. Ignores the corpus and document parameters as these will be
225 * set at run time.
226 *
227 * @return a {@link List} of {@link ProcessingResource}s that have required
228 * parameters with null values if they exist <tt>null</tt>
229 * otherwise.
230 * @throws {@link ResourceInstantiationException}
231 * if problems occur while inspecting the parameters for one of the
232 * resources. These will normally be introspection problems and are
233 * usually caused by the lack of a parameter or of the read accessor
234 * for a parameter.
235 */
236 public List getOffendingPocessingResources()
237 throws ResourceInstantiationException {
238 // take all the contained PRs
239 ArrayList badPRs = new ArrayList(getPRs());
240 // remove the ones that no parameters problems
241 Iterator prIter = getPRs().iterator();
242 while(prIter.hasNext()) {
243 ProcessingResource pr = (ProcessingResource)prIter.next();
244 ResourceData rData =
245 (ResourceData)Gate.getCreoleRegister().get(pr.getClass().getName());
246 // this is a list of lists
247 List parameters = rData.getParameterList().getRuntimeParameters();
248 // remove corpus and document
249 List newParameters = new ArrayList();
250 Iterator pDisjIter = parameters.iterator();
251 while(pDisjIter.hasNext()) {
252 List aDisjunction = (List)pDisjIter.next();
253 List newDisjunction = new ArrayList(aDisjunction);
254 Iterator internalParIter = newDisjunction.iterator();
255 while(internalParIter.hasNext()) {
256 Parameter parameter = (Parameter)internalParIter.next();
257 if(parameter.getName().equals("corpus")
258 || parameter.getName().equals("document"))
259 internalParIter.remove();
260 }
261 if(!newDisjunction.isEmpty()) newParameters.add(newDisjunction);
262 }
263
264 if(AbstractResource.checkParameterValues(pr, newParameters)) {
265 badPRs.remove(pr);
266 }
267 }
268 return badPRs.isEmpty() ? null : badPRs;
269 }
270
271
272 /**
273 * The corpus being processed by this controller.
274 */
275 protected gate.Corpus corpus;
276
277
278 /**
279 * The document being processed. This is part of the {@link LanguageAnalyser}
280 * interface, so this value is only used when the controller is used as a
281 * member of another controller.
282 */
283 protected Document document;
284
285
286 /**
287 * Overridden to also clean up the corpus value.
288 */
289 public void resourceUnloaded(CreoleEvent e) {
290 super.resourceUnloaded(e);
291 if(e.getResource() == corpus) {
292 setCorpus(null);
293 }
294 }
295 }
|