All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.pipe.ValueString2FeatureVector Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.pipe;

import cc.mallet.types.*;
import java.io.Serializable;

public class ValueString2FeatureVector extends Pipe implements Serializable {
	
	public ValueString2FeatureVector (String[] fieldNames) {
		Alphabet alphabet = new Alphabet();

		for (String s: fieldNames) { alphabet.lookupIndex(s); }

		this.dataAlphabet = alphabet;
	}

	public ValueString2FeatureVector () {}
	
	public Instance pipe(Instance carrier) {

		String data = (String) carrier.getData();
		
		String[] fields = data.trim().split("\\s+");
		double[] values = new double[fields.length];
		
		for (int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy