All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.creole.SerialAnalyserController Maven / Gradle / Ivy

Go to download

GATE - general achitecture for text engineering - is open source software capable of solving almost any text processing problem. This artifact enables you to embed the core GATE Embedded with its essential dependencies. You will able to use the GATE Embedded API and load and store GATE XML documents. This artifact is the perfect dependency for CREOLE plugins or for applications that need to customize the GATE dependencies due to confict with their own dependencies or for lower footprint.

The newest version!
/*
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Valentin Tablan 08/10/2001
 *
 *  $Id: SerialAnalyserController.java 18563 2015-02-09 16:38:02Z johann_p $
 *
 */

package gate.creole;

import gate.Controller;
import gate.CorpusController;
import gate.Document;
import gate.Factory;
import gate.Gate;
import gate.LanguageAnalyser;
import gate.ProcessingResource;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.event.CreoleEvent;
import gate.util.Benchmark;
import gate.util.GateRuntimeException;
import gate.util.Out;

import java.lang.reflect.UndeclaredThrowableException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * This class implements a SerialController that only contains
 * {@link gate.LanguageAnalyser}s. It has a {@link gate.Corpus} and its execute
 * method runs all the analysers in turn over each of the documents in the
 * corpus.
 * 

* NOTE: if at the time when execute() is invoked, the document is not null, * it is assumed that this controller is invoked from another controller and * only this document is processed while the corpus (which must still be * non-null) is ignored. Also, if the document is not null, the CorpusAwarePRs * are not notified at the beginning, end, or abnormal termination of the pipeline. *

* If the document is null, all documents in the corpus * are processed in sequence and CorpusAwarePRs are notified * before the processing of the documents and after all documents * have been processed or an abnormal termination occurred. * */ @CreoleResource(name = "Corpus Pipeline", comment = "A serial controller for PR pipelines over corpora.", helpURL = "http://gate.ac.uk/userguide/sec:developer:apps") public class SerialAnalyserController extends SerialController implements CorpusController, LanguageAnalyser, ControllerAwarePR { private static final long serialVersionUID = -7736138658476400031L; /** Debug flag */ private static final boolean DEBUG = false; /** * @return the document */ @Override public Document getDocument() { return document; } protected boolean runningAsSubPipeline = false; /** * @param document the document to set */ @Override @Optional @RunTime @CreoleParameter public void setDocument(Document document) { this.document = document; } @Override public gate.Corpus getCorpus() { return corpus; } @Override public void setCorpus(gate.Corpus corpus) { this.corpus = corpus; } @Override public void execute() throws ExecutionException { // Our assumption of if we run as a subpipeline of another corpus pipeline or // not is based on whether or not the document is null or not: if(document != null) { runningAsSubPipeline = true; } else { runningAsSubPipeline = false; } // inform ControllerAware PRs that execution has started, but only if we are not // running as a subpipeline of another corpus pipeline. if(!runningAsSubPipeline) { if(controllerCallbacksEnabled) { invokeControllerExecutionStarted(); } } Throwable thrown = null; try { if(Benchmark.isBenchmarkingEnabled()) { // write a start marker to the benchmark log for this // controller as a whole Benchmark.startPoint(getBenchmarkId()); } // do the real work this.executeImpl(); } catch(Throwable t) { thrown = t; } finally { if(thrown == null) { // successfully completed if(!runningAsSubPipeline) { if(controllerCallbacksEnabled) { invokeControllerExecutionFinished(); } } } else { // aborted if(!runningAsSubPipeline) { if(controllerCallbacksEnabled) { invokeControllerExecutionAborted(thrown); } } // rethrow the aborting exception or error if(thrown instanceof Error) { throw (Error)thrown; } else if(thrown instanceof RuntimeException) { throw (RuntimeException)thrown; } else if(thrown instanceof ExecutionException) { throw (ExecutionException)thrown; } else { // we have a checked exception that isn't one executeImpl can // throw. This shouldn't be possible, but just in case... throw new UndeclaredThrowableException(thrown); } } } } /** Run the Processing Resources in sequence. */ @Override protected void executeImpl() throws ExecutionException { interrupted = false; if(corpus == null) throw new ExecutionException("(SerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!"); benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName()); // reset the prTimeMap that keeps track of the time // taken by each PR to process the entire corpus super.resetPrTimeMap(); if(document == null){ //running as a top-level controller -> execute over all documents in //sequence // iterate through the documents in the corpus for(int i = 0; i < corpus.size(); i++) { String savedBenchmarkId = getBenchmarkId(); try { if(isInterrupted()) { throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!"); } boolean docWasLoaded = corpus.isDocumentLoaded(i); // record the time before loading the document long documentLoadingStartTime = Benchmark.startPoint(); Document doc = corpus.get(i); // include the document name in the benchmark ID for sub-events setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(), getBenchmarkId())); // report the document loading benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName()); Benchmark.checkPoint(documentLoadingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED, getBenchmarkId()), this, benchmarkFeatures); // run the system over this document // set the doc and corpus for(int j = 0; j < prList.size(); j++) { ((LanguageAnalyser)prList.get(j)).setDocument(doc); ((LanguageAnalyser)prList.get(j)).setCorpus(corpus); } try { if(DEBUG) Out.pr("SerialAnalyserController processing doc=" + doc.getName() + "..."); super.executeImpl(); if(DEBUG) Out.prln("done."); } catch (Throwable throwable) { log.error("An error occurred processing document '" + doc.getName() + "'. This was document " + (i + 1) + " of " + corpus.size() + " in the '" + corpus.getName() + "' corpus. See the log for details"); if (throwable instanceof Error) { throw (Error)throwable; } else if (throwable instanceof RuntimeException) { throw (RuntimeException)throwable; } else if (throwable instanceof ExecutionException) { throw (ExecutionException)throwable; } else { // we have a checked exception that isn't one executeImpl can // throw. This shouldn't be possible, but just in case... throw new UndeclaredThrowableException(throwable); } } finally { // make sure we unset the doc and corpus even if we got an exception for(int j = 0; j < prList.size(); j++) { ((LanguageAnalyser)prList.get(j)).setDocument(null); ((LanguageAnalyser)prList.get(j)).setCorpus(null); } } if(!docWasLoaded) { long documentSavingStartTime = Benchmark.startPoint(); // trigger saving corpus.unloadDocument(doc); Benchmark.checkPoint(documentSavingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED, getBenchmarkId()), this, benchmarkFeatures); // close the previoulsy unloaded Doc Factory.deleteResource(doc); } } finally { setBenchmarkId(savedBenchmarkId); } } }else{ //document is set, so we run as a contained controller (i.e. as a compound //Language Analyser // run the system over this document // set the doc and corpus for(int j = 0; j < prList.size(); j++) { ((LanguageAnalyser)prList.get(j)).setDocument(document); ((LanguageAnalyser)prList.get(j)).setCorpus(corpus); } try { if(DEBUG) Out.pr("SerialAnalyserController processing doc=" + document.getName() + "..."); super.executeImpl(); if(DEBUG) Out.prln("done."); } catch (Throwable throwable) { log.error("An error occurred processing document " + document.getName() + ". See the log for details"); if (throwable instanceof Error) { throw (Error)throwable; } else if (throwable instanceof RuntimeException) { throw (RuntimeException)throwable; } else if (throwable instanceof ExecutionException) { throw (ExecutionException)throwable; } else { // we have a checked exception that isn't one executeImpl can // throw. This shouldn't be possible, but just in case... throw new UndeclaredThrowableException(throwable); } } finally { // make sure we unset the doc and corpus even if we got an exception for(int j = 0; j < prList.size(); j++) { ((LanguageAnalyser)prList.get(j)).setDocument(null); ((LanguageAnalyser)prList.get(j)).setCorpus(null); } } }//document was not null // remove the features that we added benchmarkFeatures.remove(Benchmark.DOCUMENT_NAME_FEATURE); benchmarkFeatures.remove(Benchmark.CORPUS_NAME_FEATURE); } /** * Overidden from {@link SerialController} to only allow * {@link LanguageAnalyser}s as components. */ @Override public void add(ProcessingResource pr){ checkLanguageAnalyser(pr); super.add(pr); } /** * Overidden from {@link SerialController} to only allow * {@link LanguageAnalyser}s as components. */ @Override public void add(int index, ProcessingResource pr) { checkLanguageAnalyser(pr); super.add(index, pr); } /** * Throw an exception if the given processing resource is not * a LanguageAnalyser. */ protected void checkLanguageAnalyser(ProcessingResource pr) { if(!(pr instanceof LanguageAnalyser)) { throw new GateRuntimeException(getClass().getName() + " only accepts " + LanguageAnalyser.class.getName() + "s as components\n" + pr.getClass().getName() + " is not!"); } } /** * Sets the current document to the memeber PRs */ protected void setDocToPrs(Document doc) { Iterator prIter = getPRs().iterator(); while(prIter.hasNext()) { ProcessingResource pr = prIter.next(); // Not all PRs are LRs so we need to check carefully before // assuming they are if (pr instanceof LanguageAnalyser) ((LanguageAnalyser)pr).setDocument(doc); } } /** * Checks whether all the contained PRs have all the required runtime * parameters set. Ignores the corpus and document parameters as these will be * set at run time. * * @return a {@link List} of {@link ProcessingResource}s that have required * parameters with null values if they exist null * otherwise. * @throws ResourceInstantiationException * if problems occur while inspecting the parameters for one of the * resources. These will normally be introspection problems and are * usually caused by the lack of a parameter or of the read accessor * for a parameter. */ @Override public List getOffendingPocessingResources() throws ResourceInstantiationException { // take all the contained PRs List badPRs = new ArrayList(getPRs()); // remove the ones that no parameters problems Iterator prIter = getPRs().iterator(); while(prIter.hasNext()) { ProcessingResource pr = prIter.next(); ResourceData rData = Gate.getCreoleRegister().get(pr.getClass().getName()); // this is a list of lists List> parameters = rData.getParameterList().getRuntimeParameters(); // remove corpus and document List> newParameters = new ArrayList>(); Iterator> pDisjIter = parameters.iterator(); while(pDisjIter.hasNext()) { List aDisjunction = pDisjIter.next(); List newDisjunction = new ArrayList(aDisjunction); Iterator internalParIter = newDisjunction.iterator(); while(internalParIter.hasNext()) { Parameter parameter = internalParIter.next(); if(parameter.getName().equals("corpus") || parameter.getName().equals("document")) internalParIter.remove(); } if(!newDisjunction.isEmpty()) newParameters.add(newDisjunction); } if(AbstractResource.checkParameterValues(pr, newParameters)) { badPRs.remove(pr); } } return badPRs.isEmpty() ? null : badPRs; } /** * The corpus being processed by this controller. */ protected gate.Corpus corpus; /** * The document being processed. This is part of the {@link LanguageAnalyser} * interface, so this value is only used when the controller is used as a * member of another controller. */ protected Document document; /** * Overridden to also clean up the corpus value. */ @Override public void resourceUnloaded(CreoleEvent e) { super.resourceUnloaded(e); if(e.getResource() == corpus) { setCorpus(null); } } @Override public void controllerExecutionStarted(Controller c) throws ExecutionException { for(ControllerAwarePR pr : getControllerAwarePRs()) { if(pr instanceof LanguageAnalyser) { ((LanguageAnalyser)pr).setCorpus(corpus); } pr.controllerExecutionStarted(this); } } @Override public void controllerExecutionFinished(Controller c) throws ExecutionException { for(ControllerAwarePR pr : getControllerAwarePRs()) { if(pr instanceof LanguageAnalyser) { ((LanguageAnalyser)pr).setCorpus(corpus); } pr.controllerExecutionFinished(this); if(pr instanceof LanguageAnalyser) { ((LanguageAnalyser)pr).setCorpus(null); } } } @Override public void controllerExecutionAborted(Controller c, Throwable t) throws ExecutionException { for(ControllerAwarePR pr : getControllerAwarePRs()) { if(pr instanceof LanguageAnalyser) { ((LanguageAnalyser)pr).setCorpus(corpus); } pr.controllerExecutionAborted(c, t); if(pr instanceof LanguageAnalyser) { ((LanguageAnalyser)pr).setCorpus(null); } } } /** * Invoke the controllerExecutionStarted method on this controller and all nested PRs and controllers. * This method is intended to be used after if the automatic invocation of the controller * callback methods has been disabled with a call setControllerCallbackEnabled(false). Normally * the callback methods are automatically invoked at the start and end of execute(). * @throws ExecutionException */ @Override public void invokeControllerExecutionStarted() throws ExecutionException { for (ControllerAwarePR pr : getControllerAwarePRs()) { // If the pr is a nested corpus controller, make sure its corpus is set // This is necessary because the nested corpus controller will immediately // notify its own controller aware PRs and those should be able to know about // the corpus. if (pr instanceof LanguageAnalyser) { ((LanguageAnalyser) pr).setCorpus(getCorpus()); } pr.controllerExecutionStarted(this); } } /** * Invoke the controllerExecutionFinished method on this controller and all nested PRs and controllers. * This method is intended to be used after if the automatic invocation of the controller * callback methods has been disabled with a call setControllerCallbackEnabled(false). Normally * the callback methods are automatically invoked at the start and end of execute(). * @throws ExecutionException */ @Override public void invokeControllerExecutionFinished() throws ExecutionException { for (ControllerAwarePR pr : getControllerAwarePRs()) { if (pr instanceof LanguageAnalyser) { ((LanguageAnalyser) pr).setCorpus(getCorpus()); } pr.controllerExecutionFinished(this); if (pr instanceof LanguageAnalyser) { ((LanguageAnalyser) pr).setCorpus(null); } } } /** * Invoke the controllerExecutionAborted method on this controller and all nested PRs and controllers. * This method is intended to be used after if the automatic invocation of the controller * callback methods has been disabled with a call setControllerCallbackEnabled(false). Normally * the callback methods are automatically invoked at the start and end of execute(). * @param thrown The Throwable to pass on to the controller callback method. * @throws ExecutionException */ @Override public void invokeControllerExecutionAborted(Throwable thrown) throws ExecutionException { for (ControllerAwarePR pr : getControllerAwarePRs()) { if (pr instanceof LanguageAnalyser) { ((LanguageAnalyser) pr).setCorpus(getCorpus()); } pr.controllerExecutionAborted(this, thrown); if (pr instanceof LanguageAnalyser) { ((LanguageAnalyser) pr).setCorpus(null); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy