
cc.mallet.pipe.StringList2FeatureSequence Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.pipe;
import java.util.ArrayList;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
/**
* Convert a list of strings into a feature sequence
*/
public class StringList2FeatureSequence extends Pipe {
public long totalNanos = 0;
public StringList2FeatureSequence (Alphabet dataDict) {
super (dataDict, null);
}
public StringList2FeatureSequence () {
super(new Alphabet(), null);
}
public Instance pipe (Instance carrier) {
long start = System.nanoTime();
try {
ArrayList tokens = (ArrayList) carrier.getData();
FeatureSequence featureSequence =
new FeatureSequence ((Alphabet) getDataAlphabet(), tokens.size());
for (int i = 0; i < tokens.size(); i++) {
featureSequence.add (tokens.get(i));
}
carrier.setData(featureSequence);
totalNanos += System.nanoTime() - start;
} catch (ClassCastException cce) {
System.err.println("Expecting ArrayList, found " + carrier.getData().getClass());
}
return carrier;
}
static final long serialVersionUID = 1;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy