
cc.mallet.share.upenn.ner.LengthBins Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.share.upenn.ner;
import java.util.*;
import cc.mallet.pipe.*;
import cc.mallet.types.*;
/**
* A feature approximating string length.
*/
public class LengthBins extends Pipe implements java.io.Serializable {
String name;
int[] bins;
String[] binNames;
/**
* bins contains the maximum sizes of elements in each bin.
*
For example, passing in {1,3,7} would produce 4 bins, for strings
* of lengths 1, 2-3, 4-7, and 8+.
*/
public LengthBins (String featureName, int[] binMaxes) {
this.name = featureName;
this.bins = binMaxes;
Arrays.sort(bins);
int min = 1;
binNames = new String[bins.length+1];
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy