All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.dotify.common.text.StringSplitter Maven / Gradle / Ivy

The newest version!
package org.daisy.dotify.common.text;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * Provides a method for splitting a CharSequence using regex 
 * where both matching and non matching sub sequences are retained.
 * @author Joel Håkansson
 */
public class StringSplitter {
	private final Pattern pattern;
	
	/**
	 * Create a new StringSplitter using the supplied regex.
	 * @param regex the regular expression
	 */
	public StringSplitter(String regex) {
		pattern = Pattern.compile(regex);
	}

	/**
	 * Split the input string using the regular expression. Similar to the {@link String#split(String) split}
	 * method in the {@link String} class. However, contrary to {@link String#split(String) split},
	 * all subsequences are returned, even the ones that match. In other words,
	 * the input can be  reconstructed from the result.
	 * @param input the String to split
	 * @param pattern the compiled regular expression
	 * @return returns an array of SplitResults that, if put together, contain all the characters from the input.
	 */
	public static SplitResult[] split(CharSequence input, Pattern pattern) {
		ArrayList ret = new ArrayList<>();
		Matcher m = pattern.matcher(input);

		int index = 0;
		while (m.find()) {
			if (m.start()>index) {
				ret.add(new SplitResult(input.subSequence(index, m.start()).toString(), false));
			}
			ret.add(new SplitResult(input.subSequence(m.start(), m.end()).toString(), true));
			index = m.end();
		}
		if (index==0) {
			return new SplitResult[] {new SplitResult(input.toString(), false)};
		}
		// add remaining segment
		if (index




© 2015 - 2025 Weber Informatics LLC | Privacy Policy