gate.plugin.learningframework.LF_ApplyTopicModel Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of learningframework Show documentation
Show all versions of learningframework Show documentation
A GATE plugin that provides many different machine learning
algorithms for a wide range of NLP-related machine learning tasks like
text classification, tagging, or chunking.
/*
* Copyright (c) 2015-2016 The University Of Sheffield.
*
* This file is part of gateplugin-LearningFramework
* (see https://github.com/GateNLP/gateplugin-LearningFramework).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software. If not, see .
*/
package gate.plugin.learningframework;
import org.apache.log4j.Logger;
import gate.AnnotationSet;
import gate.Controller;
import gate.Document;
import gate.Factory;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.plugin.learningframework.engines.Engine;
import gate.plugin.learningframework.engines.EngineMBTopicsLDA;
import gate.util.GateRuntimeException;
import java.net.URL;
/**
*
* Training, evaluation and application of ML in GATE.
*/
@CreoleResource(name = "LF_ApplyTopicModel",
helpURL = "https://gatenlp.github.io/gateplugin-LearningFramework/LF_ApplyTopicModel",
comment = "Apply a trained topic model to document annotations")
public class LF_ApplyTopicModel extends LearningFrameworkPRBase {
static final Logger LOGGER = Logger.getLogger(LF_ApplyTopicModel.class.getCanonicalName());
private static final long serialVersionUID = 5851732674711579672L;
protected URL dataDirectory;
@RunTime
@CreoleParameter(comment = "The directory where all data will be stored and read from")
public void setDataDirectory(URL output) {
dataDirectory = output;
}
public URL getDataDirectory() {
return this.dataDirectory;
}
private String tokenAnnotationType = "Token";
@RunTime
@CreoleParameter(comment = "The annotation type representing the words/tokens to use",
defaultValue = "Token")
public void setTokenAnnotationType(String val) {
tokenAnnotationType = val;
}
public String getTokenAnnotationType() {
return tokenAnnotationType;
}
private String tokenFeature = "string";
@RunTime
@Optional
@CreoleParameter(comment = "The feature containing the token string to use, if empty, use document content",
defaultValue = "string")
public void setTokenFeature(String val) {
tokenFeature = val;
}
public String getTokenFeature() {
return tokenFeature;
}
private String featurePrefix;
@RunTime
@Optional
@CreoleParameter(comment="Prefix of the feature names written to the instance annotations",
defaultValue="LDA_")
public void setFeaturePrefix(String val) {
featurePrefix = val;
}
public String getFeaturePrefix() {
return featurePrefix;
}
////////////////////////////////////////////////////////////////////////////
private transient Engine engine;
private URL savedModelDirectoryURL;
@Override
public void process(Document doc) {
if(isInterrupted()) {
interrupted = false;
throw new GateRuntimeException("Execution was requested to be interrupted");
}
// extract the required annotation sets
AnnotationSet inputAS = doc.getAnnotations(getInputASName());
AnnotationSet tokenAS;
if(getTokenAnnotationType()==null || getTokenAnnotationType().isEmpty()) {
tokenAS = inputAS.get("Token");
} else {
tokenAS = inputAS.get(getTokenAnnotationType());
}
AnnotationSet instanceAS;
if (getInstanceType()!=null && !getInstanceType().isEmpty()) {
instanceAS = inputAS.get(getInstanceType());
} else {
// if the instance annotation set has not been specified, we put a Document annotation
// into the default set for now, unless we already have one or more.
instanceAS = document.getAnnotations().get("Document");
if(instanceAS.isEmpty()) {
gate.Utils.addAnn(inputAS, 0, doc.getContent().size(), "Document", Factory.newFeatureMap());
instanceAS = document.getAnnotations().get("Document");
}
}
EngineMBTopicsLDA engineLDA = (EngineMBTopicsLDA)engine;
engineLDA.applyTopicModel(
instanceAS, tokenAS,
getTokenFeature(),
getFeaturePrefix(),
getAlgorithmParameters());
}
@Override
public void controllerStarted(Controller controller) {
if (dataDirectory == null) {
throw new GateRuntimeException("Parameter dataDirectory not set!");
}
if (savedModelDirectoryURL == null || !savedModelDirectoryURL.toExternalForm().equals(dataDirectory.toExternalForm())) {
savedModelDirectoryURL = dataDirectory;
}
// Restore the Engine
engine = Engine.load(savedModelDirectoryURL, getAlgorithmParameters());
System.out.println("LF-Info: model loaded is now " + engine);
if (engine.getModel() == null) {
throw new GateRuntimeException("Do not have a model, something went wrong.");
// System.err.println("WARNING: no internal model to apply, this is ok if an external model is used");
} else {
System.out.println("LearningFramework: Applying model "
+ engine.getModel().getClass() + " ...");
}
}
}