All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.examples.TrainHMM Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.examples;

import java.io.*;
import java.util.*;
import java.util.regex.*;
import java.util.zip.*;

import cc.mallet.fst.*;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;

public class TrainHMM {
	
	public TrainHMM(String trainingFilename, String testingFilename) throws IOException {
		
		ArrayList pipes = new ArrayList();

		pipes.add(new SimpleTaggerSentence2TokenSequence());
		pipes.add(new TokenSequence2FeatureSequence());

		Pipe pipe = new SerialPipes(pipes);

		InstanceList trainingInstances = new InstanceList(pipe);
		InstanceList testingInstances = new InstanceList(pipe);

		trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true));
		testingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(testingFilename)))), Pattern.compile("^\\s*$"), true));
		
		HMM hmm = new HMM(pipe, null);
		hmm.addStatesForLabelsConnectedAsIn(trainingInstances);
		//hmm.addStatesForBiLabelsConnectedAsIn(trainingInstances);

		HMMTrainerByLikelihood trainer = 
			new HMMTrainerByLikelihood(hmm);
		TransducerEvaluator trainingEvaluator = 
			new PerClassAccuracyEvaluator(trainingInstances, "training");
		TransducerEvaluator testingEvaluator = 
			new PerClassAccuracyEvaluator(testingInstances, "testing");
		trainer.train(trainingInstances, 10);
		
		trainingEvaluator.evaluate(trainer);
		testingEvaluator.evaluate(trainer);
	}

	public static void main (String[] args) throws Exception {
		TrainHMM trainer = new TrainHMM(args[0], args[1]);

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy