
org.jboss.pressgang.ccms.utils.common.StringUtilities Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pressgang-ccms-utils Show documentation
Show all versions of pressgang-ccms-utils Show documentation
A set of common utilities used within the JBoss PressGang CCMS project
The newest version!
/*
Copyright 2011-2014 Red Hat, Inc
This file is part of PressGang CCMS.
PressGang CCMS is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PressGang CCMS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with PressGang CCMS. If not, see .
*/
package org.jboss.pressgang.ccms.utils.common;
import java.io.UnsupportedEncodingException;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
public class StringUtilities {
private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s");
private static char[] randomStringCharacters = new char[]{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'};
public static String generateRandomString(final int length) {
final StringBuilder text = new StringBuilder();
try {
final SecureRandom random = SecureRandom.getInstance("SHA1PRNG");
for (int i = 0; i < length; i++) {
text.append(randomStringCharacters[random.nextInt(randomStringCharacters.length)]);
}
} catch (final Exception ex) {
return null;
}
return text.toString();
}
public static int rtrimCount(final String input) {
if (input == null) return 0;
if (input.isEmpty()) return 0;
int i = input.length() - 1;
while (i >= 0 && input.charAt(i) == ' ') --i;
return input.length() - i;
}
public static int ltrimCount(final String input) {
if (input == null) return 0;
if (input.isEmpty()) return 0;
int i = 0;
while (i < input.length() && input.charAt(i) == ' ') ++i;
return i;
}
public static String rtrim(final String input) {
if (input == null) return null;
if (input.isEmpty()) return input;
int i = input.length() - 1;
while (i >= 0 && input.charAt(i) == ' ') --i;
return input.substring(0, i + 1);
}
public static String ltrim(final String input) {
if (input == null) return null;
if (input.isEmpty()) return input;
int i = 0;
while (i < input.length() && input.charAt(i) == ' ') ++i;
return input.substring(i, input.length());
}
public static String cleanTextForCSV(final String input) {
if (input == null) return "";
return "\"" + input.replaceAll("\"", "\"\"") + "\"";
}
public static String uncapitaliseFirstCharacter(final String input) {
if (input == null) return null;
if (input.isEmpty()) return "";
final String firstChar = input.substring(0, 1).toLowerCase();
final String remaining = input.length() > 1 ? input.substring(1) : "";
return firstChar + remaining;
}
/**
* @param input The original string
* @return A string with special characters that break Publican stripped out
*/
public static String cleanTextForXML(final String input) {
return input.replaceAll("(\\u00C2)|(\\u00A0)", " ") // non breaking space (UTF-8)
.replaceAll("\\u00A0", " ") // non breaking space (ISO-8859-1 or US-ASCII)
.replaceAll("\\u00E9", "\u00C3\u00A9") // a lower case Latin e with acute
.replaceAll("(\\u2018)|(\\u2019)", "'") // left/right single quote
.replaceAll("(\\u201C)|(\\u201D)", """) // right/left double quote
.replaceAll("�", "-"); // a long dash
}
/**
* Prepares a string to be inserted into xml by escaping any reserved XML symbols.
*
* The current symbols are: < > & " '
*
* @param input The original string
* @return A string with the reserved xml characters escaped.
*/
public static String escapeForXML(final String input) {
return input.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace("\"", """)
.replace("'", "'");
}
/**
* Prepares a string to be inserted into xml entity declaration by escaping any reserved XML symbols.
*
* The current symbols are: < > & "' %
*
* @param input The original string
* @return A string with the reserved xml characters escaped.
*/
public static String escapeForXMLEntity(final String input) {
return escapeForXML(input).replace("%", "%");
}
/**
* A utility function to allow us to build a single string with line breaks
* from an array of strings. This is really just used to make defining text
* files in code easier to read, as opposed to having to add and maintain
* line breaks in the initial string definition.
*/
public static String buildString(final String[] lines) {
return buildString(lines, "\n");
}
public static String buildString(final String[] lines, final String separator) {
final StringBuilder retValue = new StringBuilder();
for (final String line : lines) {
if (retValue.length() != 0) retValue.append(separator);
retValue.append(line);
}
return retValue.toString();
}
public static boolean startsWithWhitespace(final String input) {
if (input == null || input.isEmpty()) return false;
/* find any matches */
final Matcher whitespaceMatcher = WHITESPACE_PATTERN.matcher(input.substring(0, 1));
/* loop over the regular expression matches */
return whitespaceMatcher.find();
}
public static boolean endsWithWhitespace(final String input) {
if (input == null || input.isEmpty()) return false;
/* find any matches */
final Matcher whitespaceMatcher = WHITESPACE_PATTERN.matcher(input.substring(input.length() - 1, input.length()));
/* loop over the regular expression matches */
return whitespaceMatcher.find();
}
public static String convertToLinuxLineEndings(final String input) {
if (input == null) return "";
return input.replaceAll("\\r", "");
}
public static String convertToWindowsLineEndings(final String input) {
if (input == null) return "";
return input.replaceAll("(? output = new ArrayList();
if (index == -1) {
output.add(input);
return output.toArray(new String[1]);
}
while (index != -1) {
output.add(input.substring(prevIndex, index));
prevIndex = index + 1;
index = indexOf(input, split, index + 1);
}
output.add(input.substring(prevIndex, input.length()));
return output.toArray(new String[output.size()]);
}
/**
* Similar to the normal String split function. However this function ignores escaped characters (i.e. \[ ).
*
* @param input The string to be split
* @param split The char to be used to split the input string
* @param limit The maximum number of times to split the string
* @return An array of split strings
*/
public static String[] split(final String input, final char split, final int limit) {
int index = indexOf(input, split);
int prevIndex = 0, count = 1;
final ArrayList output = new ArrayList();
if (index == -1) {
output.add(input);
return output.toArray(new String[1]);
}
while (index != -1 && count != limit) {
output.add(input.substring(prevIndex, index));
prevIndex = index + 1;
index = indexOf(input, split, index + 1);
count++;
}
output.add(input.substring(prevIndex, input.length()));
return output.toArray(new String[output.size()]);
}
/**
* Checks to see if a string entered is alpha numeric
*
* @param input The string to be tested
* @return True if the string is alpha numeric otherwise false
*/
public static boolean isAlphanumeric(final String input) {
for (int i = 0; i < input.length(); i++) {
if (!Character.isLetterOrDigit(input.charAt(i))) return false;
}
return true;
}
/**
* Checks a string to see if it has the UTF8 replacement character
*
* @param input The string to be checked
* @return True of the replacement character is found otherwise false
*/
public static boolean hasInvalidUTF8Character(final String input) {
for (char c : input.toCharArray()) {
if (c == 0xFFFD) return true;
}
return false;
}
/**
* Converts a string so that it can be used in a regular expression.
*
* @param input The string to be converted.
* @return An escaped string that can be used in a regular expression.
*/
public static String convertToRegexString(final String input) {
return input.replaceAll("\\\\", "\\\\").replaceAll("\\*", "\\*").replaceAll("\\+", "\\+").replaceAll("\\]", "\\]").replaceAll("\\[",
"\\[").replaceAll("\\(", "\\(").replaceAll("\\)", "\\)").replaceAll("\\?", "\\?").replaceAll("\\$", "\\$").replaceAll("\\|",
"\\|").replaceAll("\\^", "\\^").replaceAll("\\.", "\\.");
}
/**
* Checks to see how similar two strings are using the Levenshtein distance algorithm.
*
* @param s1 The first string to compare against.
* @param s2 The second string to compare against.
* @return A value between 0 and 1.0, where 1.0 is an exact match and 0 is no match at all.
*/
public static double similarLevenshtein(String s1, String s2) {
if (s1.equals(s2)) {
return 1.0;
}
// Make sure s1 is the longest string
if (s1.length() < s2.length()) {
String swap = s1;
s1 = s2;
s2 = swap;
}
int bigLength = s1.length();
return (bigLength - StringUtils.getLevenshteinDistance(s2, s1)) / (double) bigLength;
}
/**
* Checks to see how similar two strings are using the Damerau-Levenshtein distance algorithm.
*
* @param s1 The first string to compare against.
* @param s2 The second string to compare against.
* @return A value between 0 and 1.0, where 1.0 is an exact match and 0 is no match at all.
*/
public static double similarDamerauLevenshtein(String s1, String s2) {
if (s1.equals(s2)) {
return 1.0;
}
// Make sure s1 is the longest string
if (s1.length() < s2.length()) {
String swap = s1;
s1 = s2;
s2 = swap;
}
int bigLength = s1.length();
return (bigLength - getDamerauLevenshteinDistance(s2, s1)) / (double) bigLength;
}
/**
* Get the minimum number of operations required to get from one string to another using the Damerau-Levenshtein distance algorithm
*
* Note: Java implementation of the C# algorithm from https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
*
* @param source The source string.
* @param target The string to transform the source into.
* @return The number of operations required to transform source into target.
*/
public static int getDamerauLevenshteinDistance(String source, String target) {
if (source == null || source.isEmpty()) {
if (target == null || target.isEmpty()) {
return 0;
} else {
return target.length();
}
} else if (target == null || target.isEmpty()) {
return source.length();
}
int[][] score = new int[source.length() + 2][target.length() + 2];
int INF = source.length() + target.length();
score[0][0] = INF;
for (int i = 0; i <= source.length(); i++) {
score[i + 1][1] = i;
score[i + 1][0] = INF;
}
for (int j = 0; j <= target.length(); j++) {
score[1][j + 1] = j;
score[0][j + 1] = INF;
}
final SortedMap sd = new TreeMap();
for (final char letter : (source + target).toCharArray()) {
if (!sd.containsKey(letter)) sd.put(letter, 0);
}
for (int i = 1; i <= source.length(); i++) {
int DB = 0;
for (int j = 1; j <= target.length(); j++) {
int i1 = sd.get(target.charAt(j - 1));
int j1 = DB;
if (source.charAt(i - 1) == target.charAt(j - 1)) {
score[i + 1][j + 1] = score[i][j];
DB = j;
} else {
score[i + 1][j + 1] = Math.min(score[i][j], Math.min(score[i + 1][j], score[i][j + 1])) + 1;
}
score[i + 1][j + 1] = Math.min(score[i + 1][j + 1], score[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1));
}
sd.put(source.charAt(i - 1), i);
}
return score[source.length() + 1][target.length() + 1];
}
/**
* Test to see if a String is null or contains only whitespace.
*
* @param input The String to test
* @return true if input is null or contains only whitespace, and false otherwise
*/
public static boolean isStringNullOrEmpty(final String input) {
if (input == null || input.trim().isEmpty()) {
return true;
}
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy