All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.share.upenn.ner.LengthBins Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.share.upenn.ner;


import java.util.*;

import cc.mallet.pipe.*;
import cc.mallet.types.*;

/**
 * A feature approximating string length.
 */
public class LengthBins extends Pipe implements java.io.Serializable {

    String name;
    int[] bins;
    String[] binNames;

    /**
     * 

bins contains the maximum sizes of elements in each bin. *

For example, passing in {1,3,7} would produce 4 bins, for strings * of lengths 1, 2-3, 4-7, and 8+. */ public LengthBins (String featureName, int[] binMaxes) { this.name = featureName; this.bins = binMaxes; Arrays.sort(bins); int min = 1; binNames = new String[bins.length+1]; for (int i=0; i





© 2015 - 2025 Weber Informatics LLC | Privacy Policy