ma.vi.base.string.Strings Maven / Gradle / Ivy
Show all versions of com.vikmad.base Show documentation
/*
* Copyright (c) 2016 Vikash Madhow
*/
package ma.vi.base.string;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.lang.Character.*;
/**
* Utility functions working on strings.
*
* @author Vikash Madhow ([email protected])
*/
public class Strings {
// /** A character-based splitter which should be faster than the regex-based String.split method. */
// private static class CharacterSplit implements Iterator {
// private CharacterSplit(String text, char splitter) {
// this.text = text;
// this.splitter = splitter;
// }
//
// @Override public boolean hasNext() {
// return currentPos < text.length();
// }
//
// @Override public String next() {
// checkState(currentPos < text.length(), "No more tokens");
// currentPos++;
// int nextPos = text.indexOf(currentPos, splitter);
// if (nextPos == -1) {
// String token = text.substring(currentPos);
// currentPos = text.length();
// return token;
//
// } else {
// String token = text.substring(currentPos, nextPos);
// currentPos = nextPos;
// return token;
// }
// }
//
// private final String text;
// private final char splitter;
// private int currentPos = -1;
// }
public static String shorten(String text) {
return shorten(text, 20, "...");
}
public static String shorten(String text, int len, String suffix) {
if (text.length() < len) {
return text;
} else {
return text.substring(0, len) + suffix;
}
}
/**
* Return true if string conforms to this regular expression:
*
* HEX{8}-HEX{4}-HEX{4}-HEX{4}-HEX{12}
*
* where HEX is [A-Fa-f0-9]
*
* I.e., it conforms to the hexadecimal representation of a UUID.
*/
public static boolean isUUID(String text) {
if (text != null) {
int len = text.length();
if (len == UUID_SEP.length) {
for (int i = 0; i < len; i++) {
char c = text.charAt(i);
if (UUID_SEP[i] == 1) {
if (c != '-') {
return false;
}
} else if (!(c >= '0' && c <= '9')
&& !(c >= 'A' && c <= 'F')
&& !(c >= 'a' && c <= 'f')) {
return false;
}
}
return true;
}
}
return false;
}
/**
* Position of separators (1) in a UUID
*/
private static final byte[] UUID_SEP = new byte[]{
0, 0, 0, 0, 0, 0, 0, 0,
1,
0, 0, 0, 0,
1,
0, 0, 0, 0,
1,
0, 0, 0, 0,
1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/**
* Cleans the string by removing unnecessary white spaces, leading and
* trailing non-alphanumeric characters and by fully lower case except
* for the first character which is converted to upper case.
*/
public static String clean(String value) {
if (value == null) {
return null;
} else {
return capFirst(trimPunctuations(collapseWhiteSpace(value)).toLowerCase());
}
}
/**
* Remove leading and trailing whitespaces and reduce all consecutive
* whitespaces to one.
*/
public static String collapseWhiteSpace(String value) {
if (value == null) {
return null;
}
// reduce consecutive whitespaces to one
char last = ' ';
StringBuilder s = new StringBuilder();
for (char c : value.toCharArray()) {
if (isWhitespace(c) && isWhitespace(last)) {
continue;
}
s.append(c);
last = c;
}
return s.toString().trim();
}
/**
* Returns true if a string containing only lower-case letters
* or non-alphabetic characters.
*/
public static boolean isLower(String s) {
if (s == null) {
return true;
}
for (char c : s.toCharArray()) {
if (isLetter(c) && !Character.isLowerCase(c)) {
return false;
}
}
return true;
}
/**
* Returns true if a string containing only upper-case letters
* or non-alphabetic characters.
*/
public static boolean isUpper(String s) {
return !isLower(s);
}
/**
* Strip the string of anything but letters.
*/
public static String lettersOnly(String s) {
if (s == null) {
return null;
}
StringBuilder lettersOnly = new StringBuilder();
for (char c : s.toCharArray()) {
if (isLetter(c)) {
lettersOnly.append(c);
}
}
return lettersOnly.toString();
}
/**
* Remove all leading non-alphanumeric characters
*/
public static String ltrimPunctuations(String value) {
if (value == null) {
return null;
}
int i = 0;
for (; i < value.length(); i++) {
char c = value.charAt(i);
if (isLetter(c) || isDigit(c)) {
break;
}
}
return value.substring(i);
}
/**
* Remove all trailing non-alphanumeric characters
*/
public static String rtrimPunctuations(String value) {
if (value == null) {
return null;
}
int i = value.length() - 1;
for (; i >= 0; i--) {
char c = value.charAt(i);
if (isLetter(c) || isDigit(c)) {
break;
}
}
return value.substring(0, i + 1);
}
/**
* Remove all leading and trailing non-alphanumeric characters
*/
public static String trimPunctuations(String value) {
return ltrimPunctuations(rtrimPunctuations(value));
}
/**
* Replace all characters with accents in value with their equivalent
* characters with the accent removed.
*/
public static String removeDiacritics(String value) {
if (value == null) {
return null;
}
StringBuilder buf = new StringBuilder();
value = Normalizer.normalize(value, Normalizer.Form.NFD);
for (char c : value.toCharArray()) {
if (getType(c) != NON_SPACING_MARK) {
buf.append(c);
}
}
return buf.toString();
}
/**
* Remove from the string any continuous sequence which does not match
* the supplied regular expression.
*
* @param text The text to preserve substrings from.
* @param preservePattern Only sequences in text matching this pattern will
* be preserved in the result.
* @return The processed text.
*/
public static String preserve(String text, Pattern preservePattern) {
if (text == null || preservePattern == null) {
return null;
}
StringBuilder result = new StringBuilder();
Matcher matcher = preservePattern.matcher(text);
while (matcher.find()) {
result.append(text.substring(matcher.start(), matcher.end()));
}
return result.toString();
}
// /**
// * Creates a label by expanding a camel case identifier.
// */
// public static String expandByCase(String text) {
// if (text == null) {
// throw new IllegalArgumentException("name to expand must not be null");
// }
//
// StringBuilder result = new StringBuilder();
// Matcher matcher = CASE_SPLITTER.matcher(text);
// while (matcher.find()) {
// if (result.length() != 0) {
// if (isDigit(result.charAt(result.length() - 1)) ||
// isDigit(text.charAt(matcher.start()))) {
// // append a non-breakable space between a word and a number
// result.append('\u00A0');
// } else {
// result.append(' ');
// }
// }
// result.append(text.substring(matcher.start(), matcher.end()));
// }
// return result.toString();
// }
/**
* Creates a label by expanding a camel case identifier.
*/
public static String expandByCase(String text) {
return String.join(" ", splitByCase(text));
}
/**
* Splits a string by case boundaries. E.g., AnotherCasePhrase is split
* to ['Another', 'Case', 'Phrase'].
*/
public static List splitByCase(String text) {
if (text == null) {
throw new IllegalArgumentException("Can't split null");
}
List result = new ArrayList<>();
Matcher matcher = CASE_SPLITTER.matcher(text);
while (matcher.find()) {
result.add(text.substring(matcher.start(), matcher.end()));
}
return result;
}
/**
* Capitalize first character only.
*/
public static String capFirst(String text) {
if (text == null) {
return null;
}
return switch (text.length()) {
case 0 -> text;
case 1 -> text.toUpperCase();
default -> Character.toUpperCase(text.charAt(0)) + text.substring(1);
};
}
/**
* De-capitalize first character only.
*/
public static String uncapFirst(String text) {
if (text == null) {
return null;
}
return switch (text.length()) {
case 0 -> text;
case 1 -> text.toLowerCase();
default -> Character.toLowerCase(text.charAt(0)) + text.substring(1);
};
}
/**
* Returns a string of length characters chosen randomly from the supplied
* character array.
*/
public static String random(int length, char[] chars) {
if (length <= 0) {
length = 8;
}
if (chars == null || chars.length == 0) {
chars = ALPHA_NUMERIC;
}
StringBuilder str = new StringBuilder();
for (int i = 0; i < length; i++) {
str.append(chars[random.nextInt(chars.length)]);
}
return str.toString();
}
public static String random() {
return random(0, null);
}
public static String random(int length) {
return random(length, null);
}
// public static String makeUnique(Set existing, String prefix) {
// String p = prefix;
// while (existing.contains(p)) {
// p = prefix + '_' + random(4);
// }
// existing.add(p);
// return p;
// }
/**
* Change a name by adding a random substring until it is unique
* within the set of names provided. If the name has an optional
* suffix following a period (i.e. an extension), the random string
* is added before the extension. If the name is not present in
* the set of names initially, it is returned unchanged. The unique
* unique name is added to the set before returning.
*
* @param names Set of existing names.
* @param name Name to make unique.
* @return A unique name as per the set of names.
*/
public static String makeUnique(Set names, String name) {
int pos = name.lastIndexOf('.');
String previousExt = pos == -1 ? "" : name.substring(pos);
String previousName = pos == -1 ? name : name.substring(0, pos);
while (names.contains(name)) {
name = previousName + "_" + Strings.random(4) + previousExt;
}
names.add(name);
return name;
}
private Strings() {
}
/**
* alpha-numeric characters; useful to generate random alpha-numeric strings
*/
public static final char[] ALPHA_NUMERIC = new char[]{
'0', '1', '2', '3', '4', '5', '6', '7', '8',
'9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
/**
* numeric characters; useful to generate random numeric strings
*/
public static final char[] NUMERIC = new char[]{
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
/**
* alphabetic characters; useful to generate random alphabetic strings
*/
public static final char[] ALPHABETIC = new char[]{
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
/**
* Random number generator
*/
private static final Random random = new Random();
/**
* Regular expression to split camel case identifier into separate words.
*/
public static final Pattern CASE_SPLITTER = Pattern.compile("[a-z]+|[A-Z][a-z]+|[A-Z]+|[0-9]+");
}