
cc.mallet.topics.tui.InferTopics Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.topics.tui;
import cc.mallet.util.*;
import cc.mallet.types.*;
import cc.mallet.topics.*;
import java.io.*;
public class InferTopics {
static CommandOption.String inferencerFilename = new CommandOption.String
(InferTopics.class, "inferencer", "FILENAME", true, null,
"A serialized topic inferencer from a trained topic model.\n" +
"By default this is null, indicating that no file will be read.", null);
static CommandOption.String inputFile = new CommandOption.String
(InferTopics.class, "input", "FILENAME", true, null,
"The filename from which to read the list of instances\n" +
"for which topics should be inferred. Use - for stdin. " +
"The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
static CommandOption.String docTopicsFile = new CommandOption.String
(InferTopics.class, "output-doc-topics", "FILENAME", true, null,
"The filename in which to write the inferred topic\n" +
"proportions per document. " +
"By default this is null, indicating that no file will be written.", null);
static CommandOption.Double docTopicsThreshold = new CommandOption.Double
(InferTopics.class, "doc-topics-threshold", "DECIMAL", true, 0.0,
"When writing topic proportions per document with --output-doc-topics, " +
"do not print topics with proportions less than this threshold value.", null);
static CommandOption.Integer docTopicsMax = new CommandOption.Integer
(InferTopics.class, "doc-topics-max", "INTEGER", true, -1,
"When writing topic proportions per document with --output-doc-topics, " +
"do not print more than INTEGER number of topics. "+
"A negative value indicates that all topics should be printed.", null);
static CommandOption.Integer numIterations = new CommandOption.Integer
(InferTopics.class, "num-iterations", "INTEGER", true, 100,
"The number of iterations of Gibbs sampling.", null);
static CommandOption.Integer sampleInterval = new CommandOption.Integer
(InferTopics.class, "sample-interval", "INTEGER", true, 10,
"The number of iterations between saved samples.", null);
static CommandOption.Integer burnInIterations = new CommandOption.Integer
(InferTopics.class, "burn-in", "INTEGER", true, 10,
"The number of iterations before the first sample is saved.", null);
static CommandOption.Integer randomSeed = new CommandOption.Integer
(InferTopics.class, "random-seed", "INTEGER", true, 0,
"The random seed for the Gibbs sampler. Default is 0, which will use the clock.", null);
public static void main (String[] args) {
// Process the command-line options
CommandOption.setSummary (InferTopics.class,
"Use an existing topic model to infer topic distributions for new documents");
CommandOption.process (InferTopics.class, args);
if (inferencerFilename.value == null) {
System.err.println("You must specify a serialized topic inferencer. Use --help to list options.");
System.exit(0);
}
if (inputFile.value == null) {
System.err.println("You must specify a serialized instance list. Use --help to list options.");
System.exit(0);
}
try {
TopicInferencer inferencer =
TopicInferencer.read(new File(inferencerFilename.value));
InstanceList instances = InstanceList.load (new File(inputFile.value));
if (randomSeed.value != 0) {
inferencer.setRandomSeed(randomSeed.value);
}
inferencer.writeInferredDistributions(instances, new File(docTopicsFile.value),
numIterations.value, sampleInterval.value,
burnInIterations.value,
docTopicsThreshold.value, docTopicsMax.value);
} catch (Exception e) {
e.printStackTrace();
System.err.println(e.getMessage());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy