
cc.mallet.topics.JSONTopicReports Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.topics;
import java.io.*;
import java.util.*;
import cc.mallet.types.*;
public class JSONTopicReports extends AbstractTopicReports implements TopicReports {
public JSONTopicReports (ParallelTopicModel model) {
super(model);
}
public void printSamplingState(PrintWriter out) throws IOException { }
public void printTopicDocuments(PrintWriter out, int max) throws IOException { }
public void printDocumentTopics(PrintWriter out, double threshold, int max) throws IOException { }
public void printDenseDocumentTopics(PrintWriter out) throws IOException { }
public void printTopicWordWeights(PrintWriter out) throws IOException { }
public void printTypeTopicCounts(PrintWriter out) throws IOException { }
public void printTopicPhrases(PrintWriter out, int numWords) throws IOException { }
public void printSummary(PrintWriter out, int numWords) throws IOException {
Formatter buffer = new Formatter();
ArrayList> topicSortedWords = model.getSortedWords();
buffer.format("[");
// Print results for each topic
for (int topic = 0; topic < model.numTopics; topic++) {
TreeSet sortedWords = topicSortedWords.get(topic);
int word = 0;
Iterator iterator = sortedWords.iterator();
buffer.format("{\"topic\":%d, \"smoothing\":%f, \"words\":{", topic, model.alpha[topic]);
while (iterator.hasNext() && word < numWords) {
IDSorter info = iterator.next();
buffer.format("\"%s\": %f", model.alphabet.lookupObject(info.getID()), info.getWeight());
if (iterator.hasNext() && word < numWords - 1) {
buffer.format(",");
}
word++;
}
buffer.format ("}}");
if (topic < model.numTopics - 1) {
buffer.format(",");
}
}
buffer.format("]");
out.println(buffer);
}
public static void main (String[] args) throws Exception {
InstanceList instances = InstanceList.load(new File(args[0]));
ParallelTopicModel model = new ParallelTopicModel(50, 5.0, 0.01);
model.addInstances(instances);
model.setNumIterations(100);
model.estimate();
TopicReports reports = new JSONTopicReports(model);
reports.printSummary(new File("summary.json"), 20);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy