All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.pipe.Array2FeatureVector Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




package cc.mallet.pipe;


import java.util.logging.*;
import java.lang.reflect.Array;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import cc.mallet.types.Labeling;
import cc.mallet.util.MalletLogger;

/** Converts a Java array of numerical types to a FeatureVector, where the
    Alphabet is the data array index wrapped in an Integer object.

    @author Jerod Weinman [email protected]
 */
public class Array2FeatureVector extends Pipe {

	private static Logger logger = MalletLogger.getLogger(Array2FeatureVector.class.getName());

	public Array2FeatureVector(int capacity) {

		this.dataAlphabet = new Alphabet(capacity);

	}

	public Array2FeatureVector() {
		this(1000);
	}

	/** Construct a pipe based on the dimensions of the data and target. */
	public Array2FeatureVector( Alphabet dataAlphabet, Alphabet targetAlphabet ) {

		this.dataAlphabet = dataAlphabet;
		this.targetAlphabet = targetAlphabet;

	}

	/** Convert the data in an Instance from an array to a 
	FeatureVector leaving other fields unchanged.

	Instance.getData() must return a numeric array, and it is
	 cast to double[]

	@throws IllegalStateException If Instance.getTarget() is
	not a Labeling
	 */
	public Instance pipe(  Instance carrier )
	throws IllegalStateException
	{

		int dataLength = Array.getLength( carrier.getData() );

		if ( dataLength > dataAlphabet.size() )
			for (int k=dataAlphabet.size() ; k




© 2015 - 2025 Weber Informatics LLC | Privacy Policy