
com.asher_stern.crf.smalltests.DemoPennCorpus Maven / Gradle / Ivy
package com.asher_stern.crf.smalltests;
import java.io.File;
import java.util.List;
import org.apache.log4j.Level;
import com.asher_stern.crf.postagging.data.LimitedSizePosTagCorpusReader;
import com.asher_stern.crf.postagging.data.penn.PennCorpus;
import com.asher_stern.crf.utilities.TaggedToken;
import com.asher_stern.crf.utilities.log4j.Log4jInit;
public class DemoPennCorpus
{
public static void main(String[] args)
{
try
{
Log4jInit.init(Level.DEBUG);
new DemoPennCorpus().go(new File(args[0]));
}
catch(Throwable t)
{
t.printStackTrace(System.out);
}
}
public void go(File directory)
{
PennCorpus corpus = new PennCorpus(directory);
LimitedSizePosTagCorpusReader reader = new LimitedSizePosTagCorpusReader(corpus.iterator(),10);
while (reader.hasNext())
{
List extends TaggedToken> sentence = reader.next();
StringBuilder sb = new StringBuilder();
for (TaggedToken taggedToken : sentence)
{
sb.append(taggedToken.getToken()).append("/").append( String.format("%-4s", taggedToken.getTag()) ).append(" ");
}
System.out.println(sb.toString());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy