
cc.mallet.pipe.TokenSequence2FeatureVectorSequence Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
package cc.mallet.pipe;
import java.io.*;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
/**
* Convert the token sequence in the data field of each instance to a feature vector sequence.
@author Andrew McCallum [email protected]
*/
public class TokenSequence2FeatureVectorSequence extends Pipe implements Serializable
{
boolean augmentable; // Create AugmentableFeatureVector's in the sequence
boolean binary; // Create binary (Augmentable)FeatureVector's in the sequence
boolean growAlphabet = true;
public TokenSequence2FeatureVectorSequence (Alphabet dataDict,
boolean binary, boolean augmentable)
{
super (dataDict, null);
this.augmentable = augmentable;
this.binary = binary;
}
public TokenSequence2FeatureVectorSequence (Alphabet dataDict)
{
this (dataDict, false, false);
}
public TokenSequence2FeatureVectorSequence (boolean binary, boolean augmentable)
{
super (new Alphabet(), null);
this.augmentable = augmentable;
this.binary = binary;
}
public TokenSequence2FeatureVectorSequence ()
{
this (false, false);
}
public Instance pipe (Instance carrier)
{
carrier.setData(new FeatureVectorSequence ((Alphabet)getDataAlphabet(),
(TokenSequence)carrier.getData(),
binary, augmentable,
growAlphabet));
return carrier;
}
public void setGrowAlphabet(boolean growAlphabet) {
this.growAlphabet = growAlphabet;
}
// Serialization
private static final long serialVersionUID = 1;
private static final int CURRENT_SERIAL_VERSION = 0;
private void writeObject (ObjectOutputStream out) throws IOException {
out.writeInt (CURRENT_SERIAL_VERSION);
out.writeBoolean(augmentable);
out.writeBoolean(binary);
out.writeBoolean(growAlphabet);
}
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
int version = in.readInt ();
augmentable = in.readBoolean();
binary = in.readBoolean();
// growAlphabet = true;
growAlphabet = in.readBoolean();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy