All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.examples.TestCRFPipe Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.examples;

import java.io.*;
import java.util.*;
import java.util.regex.*;
import java.util.zip.*;

import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.pipe.tsf.*;
import cc.mallet.types.*;

public class TestCRFPipe {
	
	public TestCRFPipe(String trainingFilename) throws IOException {
		
		ArrayList pipes = new ArrayList();

		PrintWriter out = new PrintWriter("test.out");

		int[][] conjunctions = new int[3][];
		conjunctions[0] = new int[] { -1 };
		conjunctions[1] = new int[] { 1 };
		conjunctions[2] = new int[] { -2, -1 };

		pipes.add(new SimpleTaggerSentence2TokenSequence());
		//pipes.add(new FeaturesInWindow("PREV-", -1, 1));
		//pipes.add(new FeaturesInWindow("NEXT-", 1, 2));
		pipes.add(new OffsetConjunctions(conjunctions));
		pipes.add(new TokenTextCharSuffix("C1=", 1));
		pipes.add(new TokenTextCharSuffix("C2=", 2));
		pipes.add(new TokenTextCharSuffix("C3=", 3));
		pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*")));
		pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*")));
		pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*")));
		pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile("\\$.*")));
		pipes.add(new TokenFirstPosition("FIRSTTOKEN"));
		pipes.add(new TokenSequence2FeatureVectorSequence());
		pipes.add(new SequencePrintingPipe(out));

		Pipe pipe = new SerialPipes(pipes);

		InstanceList trainingInstances = new InstanceList(pipe);

		trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true));

		out.close();
		
	}

	public static void main (String[] args) throws Exception {
		TestCRFPipe trainer = new TestCRFPipe(args[0]);

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy