gate.plugin.learningframework.LF_GenFeatures_Misc Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of learningframework Show documentation
Show all versions of learningframework Show documentation
A GATE plugin that provides many different machine learning
algorithms for a wide range of NLP-related machine learning tasks like
text classification, tagging, or chunking.
/*
* Copyright (c) 2015-2016 The University Of Sheffield.
*
* This file is part of gateplugin-LearningFramework
* (see https://github.com/GateNLP/gateplugin-LearningFramework).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software. If not, see .
*/
package gate.plugin.learningframework;
import gate.Annotation;
import gate.AnnotationSet;
import org.apache.log4j.Logger;
import gate.Controller;
import gate.Document;
import gate.FeatureMap;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.GateRuntimeException;
/**
*
*/
@CreoleResource(
name = "LF_GenFeatures_Misc",
helpURL = "https://gatenlp.github.io/gateplugin-LearningFramework/LF_GenFeatures_Misc",
comment = "Generate various kinds of features")
public class LF_GenFeatures_Misc extends AbstractDocumentProcessor {
private static final long serialVersionUID = -4854556257508853640L;
private final Logger logger = Logger.getLogger(LF_GenFeatures_Misc.class.getCanonicalName());
protected Boolean genWordShape = false;
@RunTime
@CreoleParameter(
comment = "Generate word shape",
defaultValue = "false")
public void setGenWordShape(Boolean val) {
genWordShape = val;
}
public Boolean getGenWordShape() {
return genWordShape;
}
protected Boolean genWordShapeShort = true;
@RunTime
@CreoleParameter(
comment = "Generate short word shape",
defaultValue = "true")
public void setGenWordShapeShort(Boolean val) {
genWordShapeShort = val;
}
public Boolean getGenWordShapeShort() {
return genWordShapeShort;
}
protected String inputASName;
@RunTime
@Optional
@CreoleParameter
public void setInputASName(String iasn) {
this.inputASName = iasn;
}
public String getInputASName() {
return this.inputASName;
}
protected String instanceType;
@RunTime
@CreoleParameter(defaultValue = "Token", comment = "The annotation type to "
+ "be treated as instance.")
public void setInstanceType(String inst) {
this.instanceType = inst;
}
public String getInstanceType() {
return this.instanceType;
}
protected String stringFeature = "";
@RunTime
@Optional
@CreoleParameter(defaultValue = "",
comment = "Where to take the word string from, empty means underlying document string"
)
public void setStringFeature(String val) {
stringFeature = val;
}
public String getStringFeature() {
return stringFeature;
}
@Override
public void process(Document doc) {
if(isInterrupted()) {
interrupted = false;
throw new GateRuntimeException("Execution was requested to be interrupted");
}
// extract the required annotation sets,
AnnotationSet inputAS = doc.getAnnotations(getInputASName());
AnnotationSet instanceAS = inputAS.get(getInstanceType());
for(Annotation ann : instanceAS) {
FeatureMap fm = ann.getFeatures();
String string;
if(getStringFeature().isEmpty()) {
string = gate.Utils.stringFor(document, ann);
} else {
string = (String)fm.get(getStringFeature());
}
if(string == null) {
string = "";
}
if(getGenWordShape()) {
char[] arr = string.toCharArray();
char[] out = new char[arr.length];
for(int i = 0; i