All Downloads are FREE. Search and download functionalities are using the official Maven repository.

justhalf.nlp.sentencesplitter.NLP4JSentenceSplitter Maven / Gradle / Ivy

package justhalf.nlp.sentencesplitter;

import java.util.ArrayList;
import java.util.List;

import edu.emory.mathcs.nlp.component.template.node.NLPNode;
import edu.emory.mathcs.nlp.tokenization.EnglishTokenizer;
import edu.stanford.nlp.ling.CoreLabel;

/**
 * An implementation of {@link SentenceSplitter} using NLP4J
 */
public class NLP4JSentenceSplitter implements SentenceSplitter {
	
	public edu.emory.mathcs.nlp.tokenization.Tokenizer nlp4jTokenizer;

	public NLP4JSentenceSplitter() {
		nlp4jTokenizer = new EnglishTokenizer();
	}

	@Override
	public boolean isThreadSafe() {
		return true;
	}

	@Override
	public String[] splitToString(String input) {
		List sentences = split(input);
		String[] result = new String[sentences.size()];
		for(int i=0; i split(String input) {
		List sentences = nlp4jTokenizer.segmentize(input);
		List result = new ArrayList();
		int lastEnd = 0;
		String between = "";
		for(NLPNode[] tokens: sentences){
			CoreLabel sentence = new CoreLabel();
			int start = tokens[0].getStartOffset();
			int end = tokens[tokens.length-1].getEndOffset();
			between = input.substring(lastEnd, start);
			if(result.size() > 0){
				result.get(result.size()-1).setAfter(between);
			}
			sentence.setBefore(between);
			sentence.setBeginPosition(start);
			sentence.setEndPosition(end);
			String sentenceText = input.substring(start, end);
			sentence.setOriginalText(sentenceText);
			sentence.setWord(sentenceText);
			sentence.setValue(sentenceText);
			result.add(sentence);
		}
		between = input.substring(lastEnd);
		if(result.size() > 0){
			result.get(result.size()-1).setAfter(between);
		}
		return result;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy