All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.asher_stern.crf.smalltests.DemoPennCorpus Maven / Gradle / Ivy

Go to download

Implementation of linear-chain Conditional Random Fields (CRF) in pure Java

The newest version!
package com.asher_stern.crf.smalltests;

import java.io.File;
import java.util.List;

import org.apache.log4j.Level;

import com.asher_stern.crf.postagging.data.LimitedSizePosTagCorpusReader;
import com.asher_stern.crf.postagging.data.penn.PennCorpus;
import com.asher_stern.crf.utilities.TaggedToken;
import com.asher_stern.crf.utilities.log4j.Log4jInit;

public class DemoPennCorpus
{

	public static void main(String[] args)
	{
		try
		{
			Log4jInit.init(Level.DEBUG);
			new DemoPennCorpus().go(new File(args[0]));
		}
		catch(Throwable t)
		{
			t.printStackTrace(System.out);
		}

	}

	public void go(File directory)
	{
		PennCorpus corpus = new PennCorpus(directory);
		LimitedSizePosTagCorpusReader reader = new LimitedSizePosTagCorpusReader(corpus.iterator(),10);
		while (reader.hasNext())
		{
			List> sentence = reader.next();
			StringBuilder sb = new StringBuilder();
			for (TaggedToken taggedToken : sentence)
			{
				sb.append(taggedToken.getToken()).append("/").append( String.format("%-4s", taggedToken.getTag()) ).append(" ");
			}
			System.out.println(sb.toString());
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy