justhalf.nlp.sentencesplitter.NLP4JSentenceSplitter Maven / Gradle / Ivy
package justhalf.nlp.sentencesplitter;
import java.util.ArrayList;
import java.util.List;
import edu.emory.mathcs.nlp.component.template.node.NLPNode;
import edu.emory.mathcs.nlp.tokenization.EnglishTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
/**
* An implementation of {@link SentenceSplitter} using NLP4J
*/
public class NLP4JSentenceSplitter implements SentenceSplitter {
public edu.emory.mathcs.nlp.tokenization.Tokenizer nlp4jTokenizer;
public NLP4JSentenceSplitter() {
nlp4jTokenizer = new EnglishTokenizer();
}
@Override
public boolean isThreadSafe() {
return true;
}
@Override
public String[] splitToString(String input) {
List sentences = split(input);
String[] result = new String[sentences.size()];
for(int i=0; i split(String input) {
List sentences = nlp4jTokenizer.segmentize(input);
List result = new ArrayList();
int lastEnd = 0;
String between = "";
for(NLPNode[] tokens: sentences){
CoreLabel sentence = new CoreLabel();
int start = tokens[0].getStartOffset();
int end = tokens[tokens.length-1].getEndOffset();
between = input.substring(lastEnd, start);
if(result.size() > 0){
result.get(result.size()-1).setAfter(between);
}
sentence.setBefore(between);
sentence.setBeginPosition(start);
sentence.setEndPosition(end);
String sentenceText = input.substring(start, end);
sentence.setOriginalText(sentenceText);
sentence.setWord(sentenceText);
sentence.setValue(sentenceText);
result.add(sentence);
}
between = input.substring(lastEnd);
if(result.size() > 0){
result.get(result.size()-1).setAfter(between);
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy