com.googlecode.kevinarpe.papaya.StringUtils Maven / Gradle / Ivy
package com.googlecode.kevinarpe.papaya;
/*
* #%L
* This file is part of Papaya.
* %%
* Copyright (C) 2013 - 2014 Kevin Connor ARPE ([email protected])
* %%
* Papaya is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GPL Classpath Exception:
* This project is subject to the "Classpath" exception as provided in
* the LICENSE file that accompanied this code.
*
* Papaya is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Papaya. If not, see .
* #L%
*/
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.googlecode.kevinarpe.papaya.annotation.FullyTested;
import com.googlecode.kevinarpe.papaya.annotation.NotFullyTested;
import com.googlecode.kevinarpe.papaya.argument.ArrayArgs;
import com.googlecode.kevinarpe.papaya.argument.IntArgs;
import com.googlecode.kevinarpe.papaya.argument.ObjectArgs;
import com.googlecode.kevinarpe.papaya.argument.StringArgs;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
/**
* Collection of static methods to manipulate {@link String} references.
*
* @author Kevin Connor ARPE ([email protected])
*/
public final class StringUtils {
// Disable default constructor
private StringUtils() {
}
/**
* This is the current newline: {@code System.getProperty("line.separator")}. This will differ
* between operating systems, such as UNIX variants and Microsoft Windows.
*
* It is also available when calling {@link String#format(String, Object...)} and using
* {@code %n} in the format string.
*
* @see #NEW_LINE
* @see #UNIX_NEW_LINE
* @see #WINDOWS_NEW_LINE
* @see #NEW_LINE_REGEX
*/
public static final String NEW_LINE;
/**
* This is the current newline as a compiled regular expression:
* {@code Pattern.compile(Pattern.quote(NEW_LINE))}
*
* @see #NEW_LINE
*/
public static final Pattern NEW_LINE_REGEX;
/**
* A regular expression pattern to match either UNIX or Windows newlines:
* {@code Pattern.compile("\r?\n")}
*
* This is useful when splitting text to its lines from a file of unknown source.
*
* @see #NEW_LINE_REGEX
*/
public static final Pattern UNIX_OR_WINDOWS_NEW_LINE_REGEX;
/**
* This is the newline used by UNIX variants: "\n". This value will never differ between
* operating systems.
*/
public static final String UNIX_NEW_LINE;
/**
* This is the newline used by Microsoft Windows: "\r\n". This value will never differ between
* operating systems.
*/
public static final String WINDOWS_NEW_LINE;
//private static final Pattern _NEW_LINE_REGEX;
static {
NEW_LINE = System.getProperty("line.separator");
UNIX_NEW_LINE = "\n";
WINDOWS_NEW_LINE = "\r\n";
NEW_LINE_REGEX = Pattern.compile(NEW_LINE);
UNIX_OR_WINDOWS_NEW_LINE_REGEX = Pattern.compile("\r?\n");
}
/**
* Strictly converts a string to a boolean value. Ignoring case, only {@code "true"} and
* {@code "false"} are accepted as valid values. The built-in Java method,
* {@link Boolean#parseBoolean(String)}, is more lenient, treating {@code "true"} (or case
* variants) as {@code true} and all other values, including {@code null}, as {@code false}.
*
* @param str
* input value
*
* @return boolean value
*
* @throws NullPointerException
* if {@code str} is {@code null}
* @throws IllegalArgumentException
* if {@code str} is not "true" or "false", ignoring case
*
* @see FuncUtils#PARSE_BOOLEAN_FROM_STRING
*/
@FullyTested
public static boolean parseBoolean(String str) {
ObjectArgs.checkNotNull(str, "str");
if (str.equalsIgnoreCase("true")) {
return true;
}
if (str.equalsIgnoreCase("false")) {
return false;
}
throw new IllegalArgumentException(String.format(
"Failed to convert string to boolean: '%s'", str));
}
/**
* Removes all leading whitespace chars. To be precise, "leading" is always defined as
* starting from index zero. This terminology may be backwards for right-to-left languages
* such as Hebrew, Arabic, and Farsi.
*
* Whitespace is defined as {@link Character#isWhitespace(char)}, which will include all
* Unicode whitespace, such as the Japanese full-width space.
*
* If zero leading whitespace chars are found, the input string reference is returned. If all
* characters in input string are whitespace, an empty string is returned.
*
* @param str
* a string reference
*
* @return string reference without leading whitespace chars
*
* @throws NullPointerException
* if {@code str} is {@code null}
*
* @see #trimWhitespaceSuffix(String)
*/
@FullyTested
public static String trimWhitespacePrefix(String str) {
ObjectArgs.checkNotNull(str, "str");
final int len = str.length();
int i = 0;
for (; i < len; ++i) {
char ch = str.charAt(i);
if (!Character.isWhitespace(ch)) {
break;
}
}
if (0 == i) {
return str;
}
if (len == i) {
return "";
}
String s2 = str.substring(i);
return s2;
}
// TODO: trimPrefixByCallback
// TODO: trimSuffixByCallback
/**
* Removes all trailing whitespace chars. To be precise, "trailing" is always defined as
* starting from the last index. This terminology may be backwards for right-to-left languages
* such as Hebrew, Arabic, and Farsi.
*
* Whitespace is defined by {@link Character#isWhitespace(char)}, which includes all special
* whitespace chars used in East Asian languages.
*
* If zero leading whitespace chars are found, the input string reference is returned. If all
* characters in input string are whitespace, an empty string is returned.
*
* @param str
* a string reference
*
* @return string reference without leading whitespace chars
*
* @throws NullPointerException
* if {@code str} is {@code null}
*
* @see #trimWhitespacePrefix(String)
*/
// TODO: This is replaced by Guava's CharMatchers.
@FullyTested
public static String trimWhitespaceSuffix(String str) {
ObjectArgs.checkNotNull(str, "str");
final int len = str.length();
int i = len - 1;
for (; i >= 0; --i) {
char ch = str.charAt(i);
if (!Character.isWhitespace(ch)) {
break;
}
}
if (len - 1 == i) {
return str;
}
if (-1 == i) {
return "";
}
String s2 = str.substring(0, i + 1);
return s2;
}
// /**
// * Tests if an input string is one or more whitespace chars. Empty strings are not
// * considered whitespace.
// *
// * Whitespace is defined by {@link Character#isWhitespace(char)}, which includes all special
// * whitespace chars used in East Asian languages.
// *
// * @param str input string
// * @return true if only whitespace
// * @throws NullPointerException if {@code str} is {@code null}
// */
// public static boolean isOnlyWhitespace(String str) {
// ObjectArgs.checkNotNull(str, "str");
// int len = str.length();
// if (0 == len) {
// return false;
// }
// boolean b = isEmptyOrWhitespace(str);
// return b;
// }
/**
* Tests if an input string is empty or only whitespace chars.
*
* Whitespace is defined by {@link Character#isWhitespace(char)}, which includes all special
* whitespace chars used in East Asian languages.
*
* @param str
* input string
*
* @return true if empty or all whitespace
*
* @throws NullPointerException
* if {@code str} is {@code null}
*/
@FullyTested
public static boolean isEmptyOrWhitespace(String str) {
ObjectArgs.checkNotNull(str, "str");
int len = str.length();
for (int i = 0; i < len; ++i) {
char ch = str.charAt(i);
if (!Character.isWhitespace(ch)) {
return false;
}
}
return true;
}
/**
* This method is modeled after Left() from Visual Basic. Copies a specified number of leading
* chars from a string. To be precise, "leading" is always defined as starting from index
* zero. This terminology may be backwards for right-to-left languages such as Hebrew, Arabic,
* and Farsi.
*
* If {@code count == 0}, the empty string is retured. If {@code count >= str.length()}, the
* input string reference is returned.
*
* @param str
* input string reference to process
* @param count
* number of chars to copy starting from first index.
* Must be >= 0, and may be greater than input length
*
* @return string reference created from leading chars
*
* @throws NullPointerException
* if {@code str} is {@code null}
* @throws IllegalArgumentException
* if {@code count} is negative
*/
@FullyTested
public static String substringPrefix(String str, int count) {
ObjectArgs.checkNotNull(str, "str");
IntArgs.checkNotNegative(count, "count");
int len = str.length();
if (count >= len) {
return str;
}
String str2 = str.substring(0, count);
return str2;
}
/**
* This method is modeled after Right() from Visual Basic. Copies a specified number of
* trailing chars from a string. To be precise, "trailing" is always defined as starting from
* the last index. This terminology may be backwards for right-to-left languages such as
* Hebrew, Arabic, and Farsi.
*
* If {@code count == 0}, the empty string is retured. If {@code count >= str.length()}, the
* input string reference is returned.
*
* @param str
* input string reference to process
* @param count
* number of chars to copy starting from last index.
* Must be >= 0, and may be greater than input length
*
* @return string reference created from trailing chars
*
* @throws NullPointerException
* if {@code str} is {@code null}
* @throws IllegalArgumentException
* if {@code count} is negative
*/
@FullyTested
public static String substringSuffix(String str, int count) {
ObjectArgs.checkNotNull(str, "str");
IntArgs.checkNotNegative(count, "count");
int len = str.length();
if (count >= len) {
return str;
}
String str2 = str.substring(len - count, len);
return str2;
}
// public static String replaceAll(String haystack, Pattern regex, String replacement) {
// ObjectArgs.checkNotNull(haystack, "haystack");
// ObjectArgs.checkNotNull(regex, "regex");
// ObjectArgs.checkNotNull(replacement, "replacement");
// Matcher m = regex.matcher(haystack);
// String s2 = m.replaceAll(replacement);
// return s2;
// }
// TODO: replaceFirstN
// TODO: replaceLast
// TODO: replaceLastN
// // Ref: http://stackoverflow.com/a/6417487/257299
// public static int findLastPatternMatch(String haystack, Pattern regex) {
// ObjectArgs.checkNotNull(haystack, "haystack");
// ObjectArgs.checkNotNull(regex, "regex");
// Matcher m = regex.matcher(haystack);
// int haystackLen = haystack.length();
// int index = _findLastPatternMatchCore(0, haystackLen, m);
// return index;
// }
//
// private static int _findLastPatternMatchCore(int start, int end, Matcher m) {
// if (start > end) {
// return -1;
// }
// final int pivot = ((end - start) / 2) + start;
// if (m.find(pivot)) {
// int index = m.start();
// if (index + 1 > end) {
// return index;
// }
// int result = _findLastPatternMatchCore(1 + index, end, m);
// return (-1 == result ? index : result);
// }
// else if (m.find(start)) {
// int index = m.start();
// if (index + 1 > pivot) {
// return index;
// }
// int result = _findLastPatternMatchCore(1 + index, pivot, m);
// return (-1 == result ? index : result);
// }
// return -1;
// }
//
// // TODO: Split by new line
// // TODO: Convert new lines
// // TODO: Add line prefix / suffix
//
/**
* This is a convenience method for {@link #removeByIndexAndCount(String, int, int)}
* where {@code count = 1}.
*/
@FullyTested
public static String removeCharAt(String str, int index) {
final int count = 1;
String x = removeByIndexAndCount(str, index, count);
return x;
}
/**
* Removes characters from a string by creating a new string and copying remaining characters
* from the source string. To be precise, using the term "remove" is a misnomer, as Java
* strings cannot change size; they are immutable.
*
* If {@code count == 0}, the input string reference is returned. No copy is made.
*
* @param str
* a string reference. May be empty, but not {@code null}
* @param index
* offset to begin removing characters. Range: 0 to {@code str.length() - 1}
* @param count
* number of characters to remove. Must be non-negative.
*
* @return reference to a string with characters removed
*
* @throws NullPointerException
* if {@code str} is {@code null}
* @throws IllegalArgumentException
*
* - if {@code str} is empty
* - if {@code count < 0}
*
* @throws IndexOutOfBoundsException
*
* - if {@code index < 0}
* - if {@code index >= str.length()}
* - if {@code index + count > str.length()}
*
*
* @see StringArgs#checkIndexAndCount(CharSequence, int, int, String, String, String)
*/
@NotFullyTested
public static String removeByIndexAndCount(String str, int index, int count) {
StringArgs.checkIndexAndCount(str, index, count, "str", "index", "count");
if (0 == count) {
return str;
}
// Never zero because StringArgs.checkIndexAndCount(...) rejects empty strings.
final int len = str.length();
String newStr = "";
final int countBefore = index;
final int countAfter = len - (index + count);
if (0 != countBefore) {
newStr = str.substring(0, countBefore);
}
if (0 != countAfter) {
String after = str.substring(index + count);
newStr += after;
}
return newStr;
}
// /**
// * Insert characters into a string by creating a new string and copying characters from the
// * source strings. To be precise, using the term "insert" is a misnomer, as Java strings
// * cannot change size.
// *
// * If {@code count == 0}, the input string reference is returned. No copy is made.
// *
// * @param str a string reference
// * @param index offset to begin inserting characters. Range: 0 to {@code str.length()}
// * @param newText characters to insert
// * @return reference to a string with characters inserted
// * @throws NullPointerException if {@code str} or {@code newText} is {@code null}
// * @throws IllegalArgumentException if {@code index} is invalid
// */
// public static String insert(String str, int index, String newText) {
// StringArgs.checkInsertIndex(str, index, "str", "index");
// ObjectArgs.checkNotNull(newText, "newText");
//
// if (0 == newText.length()) {
// return str;
// }
//
// String newStr = "";
// final int len = str.length();
// final int countBefore = index;
// final int countAfter = len - index;
//
// if (0 != len && 0 != countBefore) {
// newStr = str.substring(0, countBefore);
// }
// newStr += newText;
// if (0 != len && 0 != countAfter) {
// String after = str.substring(index);
// newStr += after;
// }
// return newStr;
// }
//
// /**
// * This is a convenience method to combine {@link #remove(String, int, int)}
// * and {@link #insert(String, int, String)} where {@code count = 1}.
// */
// public static String replace(String str, int index, String newText) {
// final int count = 1;
// String newStr = replace(str, index, count, newText);
// return newStr;
// }
//
// /**
// * This is a convenience method to combine {@link #remove(String, int, int)}
// * and {@link #insert(String, int, String)}.
// */
// public static String replace(String str, int index, int count, String newText) {
// String newStr = remove(str, index, count);
// String newStr2 = insert(newStr, index, newText);
// return newStr2;
// }
/**
* Options for methods that process text, such as
* {@link StringUtils#addPrefixAndSuffixPerLine(String, String, String, TextProcessorOption...)}.
*
* @author Kevin Connor ARPE ([email protected])
*
* @see StringUtils#addPrefixPerLine(String, String, TextProcessorOption...)
* @see StringUtils#addSuffixPerLine(String, String, TextProcessorOption...)
* @see StringUtils#addPrefixAndSuffixPerLine(String, String, String, TextProcessorOption...)
*/
@FullyTested
public static enum TextProcessorOption {
/**
* Do not process the first line in a multiline block of text. The first line will be
* preserved, unchanged.
*/
SKIP_FIRST_LINE;
/**
* Converts this reference to a debugger-friendly string. {@link Enum#toString()} only
* returns {@link #name()}. This version embellishes the result with the full class name.
* If only the name is required, call {@link #name()} directly.
*
* Docs from {@link Enum#toString()}:
*
* {@inheritDoc}
*/
@Override
public String toString() {
String x = String.format(
"enum %s: [name(): %s]",
TextProcessorOption.class.getCanonicalName(),
name());
return x;
}
}
/**
* This is a convenience method to call
* {@link #addPrefixAndSuffixPerLine(String, String, String, TextProcessorOption...)} with
* {@code optLineSuffix} as {@code null}.
*/
@FullyTested
public static String addPrefixPerLine(
String textBlock,
String optLinePrefix,
TextProcessorOption... optionArr) {
String optLineSuffix = null;
String x = addPrefixAndSuffixPerLine(textBlock, optLinePrefix, optLineSuffix, optionArr);
return x;
}
/**
* This is a convenience method to call
* {@link #addPrefixAndSuffixPerLine(String, String, String, TextProcessorOption...)} with
* {@code optLinePrefix} as {@code null}.
*/
@FullyTested
public static String addSuffixPerLine(
String textBlock,
String optLineSuffix,
TextProcessorOption... optionArr) {
String optLinePrefix = null;
String x = addPrefixAndSuffixPerLine(textBlock, optLinePrefix, optLineSuffix, optionArr);
return x;
}
/**
* Creates a new string from a multiline block of text with additional prefix and/or suffix per
* line. This method can be used to programatically indent blocks of text.
*
* Example:
*
{@code String textBlock = String.format("This is a sample%nmultiline block%nof text.");}
*
(The format code {@code "%n"} is replaced by current system newline.)
*
* {@code
* This is a sample
* multiline block
* of text.
* }
*
* Calling this method with {@code optLinePrefix} as {@code ">"} and {@code optLineSuffix} as
* {@code "<"} will return:
*
* {@code
* >This is a sample<
* >multiline block<
* >of text.<
* }
*
*
* @param textBlock
*
* - text to process, usually a multiline block of text
* - text is split and joined using {@link #NEW_LINE}
* - if text ends with {@link #NEW_LINE}:
*
* - final "phantom" empty line is ignored during processing
* - result will also end with {@link #NEW_LINE}
*
* - must not be {@code null} or empty
*
* @param optLinePrefix
*
* - additional text to add to the beginning to each line in the multiline block of
* text
* - may be {@code null} or empty, but only if parameter {@code optLineSuffix} is not
* also {@code null} or empty
*
* @param optLineSuffix
*
* - additional text to add to the end to each line in the multiline block of text
* - may be {@code null} or empty, but only if parameter {@code optLinePrefix} is not
* also {@code null} or empty
*
* @param optionArr
*
* - zero or more values from enum {@link TextProcessorOption}
* - values must not be {@code null}
*
*
* @return new string with each line modified to have new prefix and/or suffix
*
* @throws NullPointerException
*
* - if {@code textBlock} is {@code null}
* - if any element of {@code optionArr} is {@code null}
*
* @throws IllegalArgumentException
*
* - if {@code textBlock} has length zero (empty)
* - if both {@code optLinePrefix} and {@code optLineSuffix} are {@code null} or empty
*
*
* @see #addPrefixPerLine(String, String, TextProcessorOption...)
* @see #addSuffixPerLine(String, String, TextProcessorOption...)
*/
@FullyTested
public static String addPrefixAndSuffixPerLine(
String textBlock,
String optLinePrefix,
String optLineSuffix,
TextProcessorOption... optionArr) {
StringArgs.checkNotEmpty(textBlock, "textBlock");
ArrayArgs.checkElementsNotNull(optionArr, "optionArr");
boolean hasLinePrefix = (null != optLinePrefix && !optLinePrefix.isEmpty());
boolean hasLineSuffix = (null != optLineSuffix && !optLineSuffix.isEmpty());
if (!hasLinePrefix && !hasLineSuffix) {
throw new IllegalArgumentException(
"Both arguments 'optLinePrefix' and 'optLineSuffix' are null or empty");
}
Iterable lineIter = Splitter.on(NEW_LINE).split(textBlock);
ArrayList lineList = new ArrayList();
Iterables.addAll(lineList, lineIter);
List optionList = Arrays.asList(optionArr);
int lineListSize = lineList.size();
int i = (optionList.contains(TextProcessorOption.SKIP_FIRST_LINE) ? 1 : 0);
for ( ; i < lineListSize; ++i) {
String line = lineList.get(i);
// Do not modify the last line if empty.
if (i < lineListSize - 1 || !line.isEmpty()) {
if (hasLinePrefix) {
line = optLinePrefix + line;
}
if (hasLineSuffix) {
line += optLineSuffix;
}
lineList.set(i, line);
}
}
String x = Joiner.on(NEW_LINE).join(lineList);
return x;
}
}