All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.asher_stern.crf.smalltests.DemoReadBrownCorpus Maven / Gradle / Ivy

Go to download

Implementation of linear-chain Conditional Random Fields (CRF) in pure Java

The newest version!
package com.asher_stern.crf.smalltests;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;

import com.asher_stern.crf.postagging.data.brown.BrownCorpusReader;
import com.asher_stern.crf.utilities.TaggedToken;
import com.asher_stern.crf.utilities.log4j.Log4jInit;

/**
 * 
 * @author Asher Stern
 * Date: Nov 3, 2014
 *
 */
public class DemoReadBrownCorpus
{
	public static final int NUMBER_OF_SENTENCES = 10000;

	public static void main(String[] args)
	{
		try
		{
			Log4jInit.init();
			new DemoReadBrownCorpus(args[0]).go();
		}
		catch(Throwable t)
		{
			t.printStackTrace(System.out);
		}

	}
	
	
	
	public DemoReadBrownCorpus(String directoryName)
	{
		super();
		this.directoryName = directoryName;
	}



	public void go()
	{
		tags = new LinkedHashSet();
		Set twoTagsSet = new LinkedHashSet();
		BrownCorpusReader reader = new BrownCorpusReader(directoryName);
		int index=0;
		//while (index> taggedSentence = reader.next();
			++index;
//			StringBuilder sb = new StringBuilder();
			String previousTag = null;
			for (TaggedToken token : taggedSentence)
			{
//				sb.append(token).append(" ");
				tags.add(token.getTag());
				twoTagsSet.add(new TwoTagsDataStructureForDemo(previousTag, token.getTag()));
				previousTag = token.getTag();
			}
//			logger.info(sb.toString());
			if (0==index%10000)
			{
				logger.info(index);
			}
		}
		
		logger.info("Tags:");
		for (String tag : tags)
		{
			logger.info(tag);
		}
		
		logger.info("Number of detected tags = "+tags.size());
		logger.info("Number of sentences = "+index);
		
		logger.info("Number of detected two tags sequences = "+twoTagsSet.size());
		
	}
	
	
	
	
	private final String directoryName;
	
	private Set tags = null;
	
	private static final Logger logger = Logger.getLogger(DemoReadBrownCorpus.class);

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy