All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.pipe.StringList2FeatureSequence Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.pipe;

import java.util.ArrayList;

import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;

/**
 * Convert a list of strings into a feature sequence
 */

public class StringList2FeatureSequence extends Pipe {

    public long totalNanos = 0;

	public StringList2FeatureSequence (Alphabet dataDict) {
		super (dataDict, null);
	}

	public StringList2FeatureSequence () {
		super(new Alphabet(), null);
	}
	
	public Instance pipe (Instance carrier) {

		long start = System.nanoTime();

		try {
			ArrayList tokens = (ArrayList) carrier.getData();
			FeatureSequence featureSequence =
				new FeatureSequence ((Alphabet) getDataAlphabet(), tokens.size());
			for (int i = 0; i < tokens.size(); i++) {
				featureSequence.add (tokens.get(i));
			}
			carrier.setData(featureSequence);
			
			totalNanos += System.nanoTime() - start;
		} catch (ClassCastException cce) {
			System.err.println("Expecting ArrayList, found " + carrier.getData().getClass());
		}

		return carrier;
	}

	static final long serialVersionUID = 1;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy