org.eclipse.rdf4j.common.text.StringUtil Maven / Gradle / Ivy

Go to download
/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *******************************************************************************/

package org.eclipse.rdf4j.common.text;

import java.util.ArrayList;

public class StringUtil {

	/**
	 * The minimum length of initial text.
	 */
	private static final int MIN_INITIAL_TEXT_LENGTH = 3;

	/**
	 * The maximum length of derived initial text.
	 */
	private static final int MAX_INITIAL_TEXT_LENGTH = 250;

	/**
	 * Substitute String "old" by String "new" in String "text" everywhere. This is static util function that
	 * I could not place anywhere more appropriate. The name of this function is from the good-old awk time.
	 * 
	 * @param olds
	 *        The String to be substituted.
	 * @param news
	 *        The String is the new content.
	 * @param text
	 *        The String in which the substitution is done.
	 * @return The result String containing the substitutions; if no substitutions were made, the result is
	 *         'text'.
	 */
	public static String gsub(String olds, String news, String text) {
		if (olds == null || olds.length() == 0) {
			// Nothing to substitute.
			return text;
		}
		if (text == null) {
			return null;
		}

		// Search for any occurences of 'olds'.
		int oldsIndex = text.indexOf(olds);
		if (oldsIndex == -1) {
			// Nothing to substitute.
			return text;
		}

		// We're going to do some substitutions.
		StringBuilder buf = new StringBuilder(text.length());
		int prevIndex = 0;

		while (oldsIndex >= 0) {
			// First, add the text between the previous and the current
			// occurence.
			buf.append(text.substring(prevIndex, oldsIndex));

			// Then add the substition pattern
			buf.append(news);

			// Remember the index for the next loop.
			prevIndex = oldsIndex + olds.length();

			// Search for the next occurence.
			oldsIndex = text.indexOf(olds, prevIndex);
		}

		// Add the part after the last occurence.
		buf.append(text.substring(prevIndex));

		return buf.toString();
	}

	/**
	 * Returns all text occurring after the specified separator character, or the entire string when the
	 * seperator char does not occur.
	 * 
	 * @param string
	 *        The string of which the substring needs to be determined.
	 * @param separatorChar
	 *        The character to look for.
	 * @return All text occurring after the separator character, or the entire string when the character does
	 *         not occur.
	 */
	public static String getAllAfter(String string, char separatorChar) {
		int index = string.indexOf(separatorChar);
		if (index < 0 || index == string.length() - 1) {
			return string;
		}
		else {
			return string.substring(index + 1);
		}
	}

	/**
	 * Returns all text occurring before the specified separator character, or the entire string when the
	 * seperator char does not occur.
	 * 
	 * @param string
	 *        The string of which the substring needs to be determined.
	 * @param separatorChar
	 *        The character to look for.
	 * @return All text occurring before the separator character, or the entire string when the character does
	 *         not occur.
	 */
	public static String getAllBefore(String string, char separatorChar) {
		int index = string.indexOf(separatorChar);
		return index <= 0 ? string : string.substring(0, index - 1);
	}

	/**
	 * Encodes an array of Strings into a single String than can be decoded to the original array using the
	 * corresponding decode method. Useful for e.g. storing an array of Strings as a single entry in a
	 * Preferences node.
	 */
	public static String encodeArray(String[] array) {
		StringBuilder buffer = new StringBuilder();
		int nrItems = array.length;

		for (int i = 0; i < nrItems; i++) {
			String item = array[i];
			item = StringUtil.gsub("_", "__", item);
			buffer.append(item);

			if (i < nrItems - 1) {
				buffer.append("_.");
			}
		}

		return buffer.toString();
	}

	/**
	 * Decodes a String generated by encodeArray.
	 */
	public static String[] decodeArray(String encodedArray) {
		String[] items = encodedArray.split("_\\.");
		ArrayList list = new ArrayList();

		for (int i = 0; i < items.length; i++) {
			String item = items[i];
			item = gsub("__", "_", item);
			if (!item.equals("")) {
				list.add(item);
			}
		}

		return list.toArray(new String[list.size()]);
	}

	/**
	 * Derives the initial text from the supplied text. The returned text excludes whitespace and other
	 * special characters and is useful for display purposes (e.g. previews).
	 */
	public static String deriveInitialText(String text) {
		String result = null;

		int startIdx = 0; // index of the first text character
		int endIdx = 0; // index of the first char after the end of the text
		int textLength = text.length();

		while (startIdx < textLength && result == null) {
			startIdx = endIdx;

			// skip until first/next text character
			while (startIdx < textLength && !isInitialTextStartChar(text.charAt(startIdx))) {
				startIdx++;
			}

			// try to find an initial text of a sufficient length
			endIdx = startIdx + 1;
			while (endIdx < textLength && ((endIdx - startIdx) < MAX_INITIAL_TEXT_LENGTH)
					&& isInitialTextChar(text.charAt(endIdx)))
			{
				endIdx++;
			}

			if (endIdx - startIdx >= MIN_INITIAL_TEXT_LENGTH) {
				// get candidate text. The text is trimmed to remove any spaces
				// at the end. This will prevent texts like "A " to be accepted.
				String candidateText = text.substring(startIdx, endIdx).trim();
				if (!isGarbageText(candidateText)) {
					result = candidateText;
				}
			}
		}

		return result;
	}

	/**
	 * Titles shorter than MIN_TITLE_LENGTH and long titles that don't contain a single space character are
	 * considered to be garbage.
	 */
	public static boolean isGarbageText(String text) {
		boolean result = false;

		if (text.trim().length() < MIN_INITIAL_TEXT_LENGTH) {
			result = true;
		}
		else if (text.length() > 30) {
			result = true;

			for (int i = 0; i < text.length(); i++) {
				if (Character.getType(text.charAt(i)) == Character.SPACE_SEPARATOR) {
					result = false;
					break;
				}
			}
		}

		return result;
	}

	/**
	 * Appends the specified character n times to the supplied StringBuilder.
	 * 
	 * @param c
	 *        The character to append.
	 * @param n
	 *        The number of times the character should be appended.
	 * @param sb
	 *        The StringBuilder to append the character(s) to.
	 */
	public static void appendN(char c, int n, StringBuilder sb) {
		for (int i = n; i > 0; i--) {
			sb.append(c);
		}
	}

	/**
	 * Removes the double quote from the start and end of the supplied string if it starts and ends with this
	 * character. This method does not create a new string if text doesn't start and end with double
	 * quotes, the text object itself is returned in that case.
	 * 
	 * @param text
	 *        The string to remove the double quotes from.
	 * @return The trimmed string, or a reference to text if it did not start and end with double
	 *         quotes.
	 */
	public static String trimDoubleQuotes(String text) {
		int textLength = text.length();

		if (textLength >= 2 && text.charAt(0) == '"' && text.charAt(textLength - 1) == '"') {
			return text.substring(1, textLength - 1);
		}

		return text;
	}

	// A nice overview of Unicode character categories can be found at:
	// http://oss.software.ibm.com/cgi-bin/icu/ub

	private static boolean isInitialTextStartChar(char c) {
		int charType = Character.getType(c);

		return charType == Character.UPPERCASE_LETTER || charType == Character.LOWERCASE_LETTER
				|| charType == Character.TITLECASE_LETTER || charType == Character.MODIFIER_LETTER
				|| charType == Character.OTHER_LETTER || charType == Character.DECIMAL_DIGIT_NUMBER
				|| charType == Character.START_PUNCTUATION || charType == Character.INITIAL_QUOTE_PUNCTUATION;
	}

	private static boolean isInitialTextChar(char c) {
		int charType = Character.getType(c);

		return charType == Character.UPPERCASE_LETTER || charType == Character.LOWERCASE_LETTER
				|| charType == Character.TITLECASE_LETTER || charType == Character.MODIFIER_LETTER
				|| charType == Character.OTHER_LETTER || charType == Character.DECIMAL_DIGIT_NUMBER
				|| charType == Character.SPACE_SEPARATOR || charType == Character.CONNECTOR_PUNCTUATION
				|| charType == Character.DASH_PUNCTUATION || charType == Character.START_PUNCTUATION
				|| charType == Character.END_PUNCTUATION || charType == Character.INITIAL_QUOTE_PUNCTUATION
				|| charType == Character.FINAL_QUOTE_PUNCTUATION || charType == Character.OTHER_PUNCTUATION;
	}

	/**
	 * Concatenate a number of Strings. This implementation uses a StringBuilder.
	 * 
	 * @param strings
	 *        the String to concatenate
	 * @return a String that is the results of concatenating the input strings.
	 */
	public static String concat(String... strings) {
		// Determine total length of concatenated string to prevent expensive char
		// array copies for growing StringBuilder's internal array
		int totalLength = 0;
		for (String s : strings) {
			totalLength += s.length();
		}

		StringBuilder result = new StringBuilder(totalLength);
		for (String string : strings) {
			result.append(string);
		}

		return result.toString();
	}
}