cc.mallet.pipe.FeatureSequenceConvolution Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
The newest version!
/**
*
*/
package cc.mallet.pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.TokenSequence;
/**
* @author lmyao
* Convert Feature sequence
*/
public class FeatureSequenceConvolution extends Pipe {
/**
*
*/
public FeatureSequenceConvolution() {
// TODO Auto-generated constructor stub
super(new Alphabet(), null);
}
/**
* construct word co-occurrence features from the original sequence
* do combinatoric, n choose 2, can be extended to n choose 3
public void convolution() {
int fi = -1;
int pre = -1;
int i,j;
int curLen = length;
for(i = 0; i < curLen-1; i++) {
for(j = i + 1; j < curLen; j++) {
pre = features[i];
fi = features[j];
Object preO = dictionary.lookupObject(pre);
Object curO = dictionary.lookupObject(fi);
Object coO = preO.toString() + "_" + curO.toString();
add(coO);
}
}
}*/
public Instance pipe (Instance carrier)
{
FeatureSequence fseq = (FeatureSequence) carrier.getData();
FeatureSequence ret =
new FeatureSequence ((Alphabet)getDataAlphabet());
int i,j, curLen;
curLen=fseq.getLength();
//first add fseq to ret
for(i = 0; i < curLen; i++) {
ret.add(fseq.getObjectAtPosition(i));
}
//second word co-occurrence
int pre, cur;
Object coO;
for(i = 0; i < curLen-1; i++) {
for(j = i + 1; j < curLen; j++) {
pre = fseq.getIndexAtPosition(i);
cur = fseq.getIndexAtPosition(j);
coO = pre + "_" + cur;
ret.add(coO);
}
}
if(carrier.isLocked()) {
carrier.unLock();
}
carrier.setData(ret);
return carrier;
}
}