opennlp.tools.ngram.NGramGenerator Maven / Gradle / Ivy
/*
* Copyright 2013 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.ngram;
import java.util.ArrayList;
import java.util.List;
/**
* Generates an nGram, with optional separator, and returns the grams as a list
* of strings
*/
public class NGramGenerator {
/**
* Creates an ngram separated
* by the separator param value i.e. a,b,c,d with n = 3 and separator = "-"
* would return a-b-c,b-c-d
*
* @param input the input tokens the output ngrams will be derived from
* @param n the number of tokens as the sliding window
* @param separator each string in each gram will be separated by this value if desired. Pass in empty string if no separator is desired
* @return
*/
public static List generate(List input, int n, String separator) {
List outGrams = new ArrayList<>();
for (int i = 0; i < input.size() - (n - 2); i++) {
String gram = "";
if ((i + n) <= input.size()) {
for (int x = i; x < (n + i); x++) {
gram += input.get(x) + separator;
}
gram = gram.substring(0, gram.lastIndexOf(separator));
outGrams.add(gram);
}
}
return outGrams;
}
/**
*Generates an nGram based on a char[] input
* @param input the array of chars to convert to nGram
* @param n The number of grams (chars) that each output gram will consist of
* @param separator each char in each gram will be separated by this value if desired. Pass in empty string if no separator is desired
* @return
*/
public static List generate(char[] input, int n, String separator) {
List outGrams = new ArrayList<>();
for (int i = 0; i < input.length - (n - 2); i++) {
String gram = "";
if ((i + n) <= input.length) {
for (int x = i; x < (n + i); x++) {
gram += input[x] + separator;
}
gram = gram.substring(0, gram.lastIndexOf(separator));
outGrams.add(gram);
}
}
return outGrams;
}
}