gate.plugins.ANNIE.creole.xml Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of gate-extraction Show documentation
Gate based component, that can process the Text units to extract informations using Gate's tools (such as grammars, gazetteers, tokenizer or POS Taggers). This project contains two versions, a simple component and webservice one.
There is a newer version: 2.0
Show newest version
<?xml version="1.0"?>
<CREOLE-DIRECTORY>

	<CREOLE>

		<!-- creole.xml for the Unicode tokeniser -->
		<RESOURCE>
			<NAME>GATE Unicode Tokeniser</NAME>
			<CLASS>gate.creole.tokeniser.SimpleTokeniser</CLASS>
			<COMMENT>A customisable Unicode tokeniser.</COMMENT>
			<HELPURL>http://gate.ac.uk/userguide/sec:tokeniser</HELPURL>
			<PARAMETER NAME="document" COMMENT="The document to be tokenised" RUNTIME="true">gate.Document</PARAMETER>
			<PARAMETER NAME="annotationSetName" RUNTIME="true" COMMENT="The annotation set to be used for the generated annotations" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER DEFAULT="resources/tokeniser/DefaultTokeniser.rules" COMMENT="The URL to the rules file" SUFFIXES="rules" NAME="rulesURL">java.net.URL</PARAMETER>
			<PARAMETER DEFAULT="UTF-8" COMMENT="The encoding used for reading the definitions" NAME="encoding">java.lang.String</PARAMETER>
			<ICON>tokeniser</ICON>
		</RESOURCE>

		<!-- creole.xml for the Englishtokeniser -->
		<RESOURCE>
			<NAME>ANNIE English Tokeniser</NAME>
			<CLASS>gate.creole.tokeniser.DefaultTokeniser</CLASS>
			<COMMENT>A customisable English tokeniser.</COMMENT>
			<HELPURL>http://gate.ac.uk/userguide/sec:tokeniser</HELPURL>
			<PARAMETER NAME="document" COMMENT="The document to be tokenised" RUNTIME="true">gate.Document</PARAMETER>
			<PARAMETER NAME="annotationSetName" RUNTIME="true" COMMENT="The annotation set to be used for the generated annotations" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="tokeniserRulesURL" DEFAULT="resources/tokeniser/DefaultTokeniser.rules" COMMENT="The URL to the rules file" SUFFIXES="rules">java.net.URL</PARAMETER>
			<PARAMETER NAME="transducerGrammarURL" DEFAULT="resources/tokeniser/postprocess.jape" COMMENT="The URL to the postprocessing transducer" SUFFIXES="jape">java.net.URL</PARAMETER>
			<PARAMETER NAME="encoding" COMMENT="The encoding used for reading the definitions" DEFAULT="UTF-8">java.lang.String</PARAMETER>
			<ICON>tokeniser</ICON>
		</RESOURCE>

		<!-- creole.xml for gazetteer -->
		<RESOURCE>
			<NAME>ANNIE Gazetteer</NAME>
			<CLASS>gate.creole.gazetteer.DefaultGazetteer</CLASS>
			<COMMENT>A list lookup component.</COMMENT>
			<HELPURL>http://gate.ac.uk/userguide/sec:gazetteer</HELPURL>
			<PARAMETER NAME="document" RUNTIME="true" COMMENT="The document to be processed">gate.Document</PARAMETER>
			<PARAMETER NAME="gazetteerFeatureSeparator"
				COMMENT="The character used to separate features for entries in gazetteer lists. Accepts strings like &quot;\t&quot; and will unescape it to the relevant character. If not specified, this gazetteer does not support extra features."
				OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="listsURL" DEFAULT="resources/gazetteer/lists.def" COMMENT="The URL to the file with list of lists" SUFFIXES="def">java.net.URL</PARAMETER>
			<PARAMETER NAME="caseSensitive" DEFAULT="true" COMMENT="Should this gazetteer diferentiate on case?">java.lang.Boolean</PARAMETER>
			<PARAMETER NAME="encoding" DEFAULT="UTF-8" COMMENT="The encoding used for reading the definitions">java.lang.String</PARAMETER>
			<PARAMETER NAME="annotationSetName" RUNTIME="true" COMMENT="The annotation set to be used for the generated annotations" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="wholeWordsOnly" DEFAULT="true" COMMENT="Should this gazetteer only match whole words?" RUNTIME="true">java.lang.Boolean</PARAMETER>
			<PARAMETER NAME="longestMatchOnly" DEFAULT="true" COMMENT="Should this gazetteer only match the longest string starting from any offset?" RUNTIME="true">java.lang.Boolean</PARAMETER>
			<ICON>gazetteer</ICON>
		</RESOURCE>

		<!-- creole.xml for Hash Gazetteer (ex Natural)-->
		<RESOURCE>
			<NAME>Hash Gazetteer</NAME>
			<CLASS>com.ontotext.gate.gazetteer.HashGazetteer</CLASS>
			<COMMENT>A list lookup component implemented by OntoText Lab.
				The licence information is also available in licence.ontotext.html in the lib folder of GATE</COMMENT>
			<HELPURL>http://www.ontotext.com/downloads/index.html#gazetteer</HELPURL>
			<PARAMETER NAME="document" RUNTIME="true" COMMENT="The document to be processed">gate.Document</PARAMETER>
			<PARAMETER NAME="annotationSetName" RUNTIME="true" COMMENT="The annotation set to be used for the generated annotations" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="listsURL" DEFAULT="resources/gazetteer/lists.def" COMMENT="The URL to the file with list of lists" SUFFIXES="def">java.net.URL</PARAMETER>
			<PARAMETER DEFAULT="UTF-8" COMMENT="The encoding used for reading the definitions" NAME="encoding">java.lang.String</PARAMETER>
			<PARAMETER DEFAULT="true" NAME="caseSensitive" COMMENT="Should this gazetteer diferentiate on case? Currently the Gazetteer works only in case sensitive mode.">java.lang.Boolean</PARAMETER>
			<ICON>gazetteer</ICON>
		</RESOURCE>

		<!-- creole.xml for JapeTransducer -->
		<RESOURCE>
			<NAME>Jape Transducer</NAME>
			<CLASS>gate.creole.Transducer</CLASS>
			<COMMENT>A module for executing Jape grammars.</COMMENT>
			<HELPURL>http://gate.ac.uk/userguide/chap:jape</HELPURL>
			<PARAMETER NAME="document" RUNTIME="true" COMMENT="The document to be processed">gate.Document</PARAMETER>
			<PARAMETER NAME="inputASName" RUNTIME="true" COMMENT="The annotation set to be used as input for the transducer" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="outputASName" RUNTIME="true" COMMENT="The annotation set to be used as output for the transducer" OPTIONAL="true">java.lang.String</PARAMETER>
			<OR>
				<PARAMETER NAME="grammarURL" COMMENT="The URL to the grammar file" SUFFIXES="jape">java.net.URL</PARAMETER>
				<PARAMETER NAME="binaryGrammarURL" COMMENT="The URL to the binary grammar file" SUFFIXES="jape">java.net.URL</PARAMETER>
			</OR>
			<PARAMETER NAME="encoding" DEFAULT="UTF-8" COMMENT="The encoding used for reading the grammar">java.lang.String</PARAMETER>
			<PARAMETER NAME="ontology" COMMENT="The ontology to be used by this transducer" OPTIONAL="true" RUNTIME="true">gate.creole.ontology.Ontology</PARAMETER>
			<PARAMETER NAME="operators" RUNTIME="false" OPTIONAL="true" COMMENT="Class names that implement gate.jape.constraint.ConstraintPredicate." ITEM_CLASS_NAME="java.lang.String">java.util.ArrayList</PARAMETER>
			<PARAMETER NAME="annotationAccessors" RUNTIME="false" OPTIONAL="true" COMMENT="Class names that implement gate.jape.constraint.AnnotationAccessor." ITEM_CLASS_NAME="java.lang.String">java.util.ArrayList
			</PARAMETER>
			<ICON>jape</ICON>
		</RESOURCE>

		<!-- creole.xml for ANNIE Transducer -->
		<RESOURCE>
			<NAME>ANNIE NE Transducer</NAME>
			<CLASS>gate.creole.ANNIETransducer</CLASS>
			<COMMENT>ANNIE named entity grammar.</COMMENT>
			<HELPURL>http://gate.ac.uk/userguide/sec:semantic-tagger</HELPURL>
			<PARAMETER NAME="document" RUNTIME="true" COMMENT="The document to be processed">gate.Document</PARAMETER>
			<PARAMETER NAME="inputASName" RUNTIME="true" COMMENT="The annotation set to be used as input for the transducer" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="outputASName" RUNTIME="true" COMMENT="The annotation set to be used as output for the transducer" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="grammarURL" COMMENT="The URL to the grammar file" DEFAULT="resources/NE/main.jape" SUFFIXES="jape">java.net.URL</PARAMETER>
			<PARAMETER NAME="encoding" DEFAULT="UTF-8" COMMENT="The encoding used for reading the grammar">java.lang.String</PARAMETER>
			<PARAMETER NAME="operators" RUNTIME="false" OPTIONAL="true" COMMENT="Class names that implement gate.jape.constraint.ConstraintPredicate." ITEM_CLASS_NAME="java.lang.String">java.util.ArrayList</PARAMETER>
			<PARAMETER NAME="annotationAccessors" RUNTIME="false" OPTIONAL="true" COMMENT="Class names that implement gate.jape.constraint.AnnotationAccessor." ITEM_CLASS_NAME="java.lang.String">java.util.ArrayList
			</PARAMETER>
			<ICON>ne-transducer</ICON>
		</RESOURCE>

		<!-- creole.xml for Sentence splitter-->
		<RESOURCE>
			<NAME>ANNIE Sentence Splitter</NAME>
			<COMMENT>ANNIE sentence splitter.</COMMENT>
			<HELPURL>http://gate.ac.uk/userguide/sec:splitter</HELPURL>
			<CLASS>gate.creole.splitter.SentenceSplitter</CLASS>
			<PARAMETER COMMENT="The document to be processed" NAME="document" RUNTIME="true">gate.Document</PARAMETER>
			<PARAMETER NAME="inputASName" COMMENT="The annotation set to be used as input that must contain 'Token' annotations" RUNTIME="true" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="outputASName" COMMENT="The annotation set to be used as output for 'Sentence' and 'Split' annotations" RUNTIME="true" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="encoding" COMMENT="The encoding used for reading the definition files" DEFAULT="UTF-8">java.lang.String</PARAMETER>
			<PARAMETER NAME="gazetteerListsURL" DEFAULT="resources/sentenceSplitter/gazetteer/lists.def" COMMENT="The URL to the custom list lookup definition file" OPTIONAL="true" SUFFIXES="def">java.net.URL
			</PARAMETER>
			<PARAMETER NAME="transducerURL" DEFAULT="resources/sentenceSplitter/grammar/main.jape" COMMENT="The URL to the custom Jape grammar file" OPTIONAL="true" SUFFIXES="jape">java.net.URL</PARAMETER>
			<ICON>sentence-splitter</ICON>
		</RESOURCE>


		<!-- creole.xml for HepTag (Mark Hepple's POS tagger)-->
		<RESOURCE>
			<NAME>ANNIE POS Tagger</NAME>
			<COMMENT>Mark Hepple's Brill-style POS tagger.</COMMENT>
			<HELPURL>http://gate.ac.uk/userguide/sec:tagger</HELPURL>
			<CLASS>gate.creole.POSTagger</CLASS>
			<PARAMETER NAME="document" COMMENT="The document to be processed" RUNTIME="true">gate.Document</PARAMETER>
			<PARAMETER NAME="inputASName" COMMENT="The annotation set to be used as input that must contain 'Token' and 'Sentence' annotations" RUNTIME="true" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="outputASName" COMMENT="The annotation set to be used as output for POS annotations" RUNTIME="true" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="encoding" COMMENT="The encoding used for reading rules and lexicons" OPTIONAL="true">java.lang.String</PARAMETER>
			<PARAMETER NAME="lexiconURL" DEFAULT="resources/heptag/lexicon" COMMENT="The URL to the lexicon file" OPTIONAL="true">java.net.URL</PARAMETER>
			<PARAMETER NAME="rulesURL" DEFAULT="resources/heptag/ruleset" COMMENT="The URL to the ruleset file" OPTIONAL="true">java.net.URL</PARAMETER>
			<PARAMETER NAME="baseTokenAnnotationType" COMMENT="The name of the base 'Token' annotation type" RUNTIME="true" OPTIONAL="false" DEFAULT="Token">java.lang.String</PARAMETER>
			<PARAMETER NAME="baseSentenceAnnotationType" COMMENT="The name of the base 'Sentence' annotation type" RUNTIME="true" OPTIONAL="false" DEFAULT="Sentence">java.lang.String</PARAMETER>
			<PARAMETER NAME="outputAnnotationType" COMMENT="The name of the annotation type where the new features should be added" RUNTIME="true" OPTIONAL="false" DEFAULT="Token">java.lang.String
			</PARAMETER>
			<ICON>pos-tagger</ICON>
		</RESOURCE>

	</CREOLE>
</CREOLE-DIRECTORY>