
de.julielab.jpos.pipes.TokenNGramPipe Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-jpos-ae Show documentation
Show all versions of jcore-jpos-ae Show documentation
JPOS MaxEnt POS Tagger and its UIMA wrapper.
/**
* TokenNGramPipe.java
*
* Copyright (c) 2015, JULIE Lab.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser General Public License (LGPL) v3.0
*
* Author: tomanek
*
* Current version: 2.3
* Since version: 2.2
*
* Creation date: Mar 5, 2008
*
* This pipe creates token-level ngrams. The instance is assumed to
* have a TokenSequence in the data field!
**/
package de.julielab.jpos.pipes;
import java.util.ArrayList;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
public class TokenNGramPipe extends Pipe {
/**
*
*/
private static final long serialVersionUID = 1L;
private final int[] ngramSizes;
public TokenNGramPipe(final int[] ngramSizes) {
this.ngramSizes = ngramSizes;
}
@Override
public Instance pipe(final Instance carrier) {
final TokenSequence tokenSequence = (TokenSequence) carrier.getData();
final String[] tokenTexts = new String[tokenSequence.size()];
for (int i = 0; i < tokenSequence.size(); i++) {
final Token t = tokenSequence.get(i);
tokenTexts[i] = t.getText();
}
// now make new ngram features
for (int i = 0; i < tokenSequence.size(); i++) {
final Token token = tokenSequence.get(i);
final ArrayList ngrams = (new NGramGenerator())
.generateTokenNGrams(tokenTexts, i, ngramSizes);
for (final String ngram : ngrams)
token.setFeatureValue("TOK_NGRAM=" + ngram, 1.0);
}
return carrier;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy