
cc.mallet.examples.TrainHMM Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.examples;
import java.io.*;
import java.util.*;
import java.util.regex.*;
import java.util.zip.*;
import cc.mallet.fst.*;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
public class TrainHMM {
public TrainHMM(String trainingFilename, String testingFilename) throws IOException {
ArrayList pipes = new ArrayList();
pipes.add(new SimpleTaggerSentence2TokenSequence());
pipes.add(new TokenSequence2FeatureSequence());
Pipe pipe = new SerialPipes(pipes);
InstanceList trainingInstances = new InstanceList(pipe);
InstanceList testingInstances = new InstanceList(pipe);
trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true));
testingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(testingFilename)))), Pattern.compile("^\\s*$"), true));
HMM hmm = new HMM(pipe, null);
hmm.addStatesForLabelsConnectedAsIn(trainingInstances);
//hmm.addStatesForBiLabelsConnectedAsIn(trainingInstances);
HMMTrainerByLikelihood trainer =
new HMMTrainerByLikelihood(hmm);
TransducerEvaluator trainingEvaluator =
new PerClassAccuracyEvaluator(trainingInstances, "training");
TransducerEvaluator testingEvaluator =
new PerClassAccuracyEvaluator(testingInstances, "testing");
trainer.train(trainingInstances, 10);
trainingEvaluator.evaluate(trainer);
testingEvaluator.evaluate(trainer);
}
public static void main (String[] args) throws Exception {
TrainHMM trainer = new TrainHMM(args[0], args[1]);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy