All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.noleme.flow.connect.text.transformer.BasicSentenceSplitter Maven / Gradle / Ivy

The newest version!
package com.noleme.flow.connect.text.transformer;

import com.noleme.flow.actor.transformer.Transformer;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

/**
 * @author Pierre Lecerf ([email protected])
 * Created on 2020/12/17
 */
public class BasicSentenceSplitter implements Transformer>
{
    private final Locale locale;

    /**
     *
     * @param locale
     */
    public BasicSentenceSplitter(Locale locale)
    {
        this.locale = locale;
    }

    @Override
    public List transform(String input)
    {
        List sentences = new ArrayList<>();
        BreakIterator iterator = BreakIterator.getSentenceInstance(this.locale);
        iterator.setText(input);

        int start = iterator.first();
        int end = iterator.next();
        while (end != BreakIterator.DONE)
        {
            String sentence = input.substring(start, end).trim();

            if (sentence.isBlank())
                continue;

            sentences.add(sentence);

            start = end;
            end = iterator.next();
        }

        return sentences;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy