org.drools.core.util.StringUtils Maven / Gradle / Ivy
/*
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.drools.core.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.UUID;
import static java.lang.Character.isWhitespace;
/**
* Ripped form commons StringUtil, unless specified:
*
* Operations on {@link java.lang.String} that are
* null
safe.
*
*
* - IsEmpty/IsBlank
* - checks if a String contains text
* - Trim/Strip
* - removes leading and trailing whitespace
* - Equals
* - compares two strings null-safe
* - IndexOf/LastIndexOf/Contains
* - null-safe index-of checks
*
- IndexOfAny/LastIndexOfAny/IndexOfAnyBut/LastIndexOfAnyBut
* - index-of any of a set of Strings
* - ContainsOnly/ContainsNone
* - does String contains only/none of these characters
* - Substring/Left/Right/Mid
* - null-safe substring extractions
* - SubstringBefore/SubstringAfter/SubstringBetween
* - substring extraction relative to other strings
* - Split/Join
* - splits a String into an array of substrings and vice versa
* - Remove/Delete
* - removes part of a String
* - Replace/Overlay
* - Searches a String and replaces one String with another
* - Chomp/Chop
* - removes the last part of a String
* - LeftPad/RightPad/Center/Repeat
* - pads a String
* - UpperCase/LowerCase/SwapCase/Capitalize/Uncapitalize
* - changes the case of a String
* - CountMatches
* - counts the number of occurrences of one String in another
* - IsAlpha/IsNumeric/IsWhitespace/IsAsciiPrintable
* - checks the characters in a String
* - DefaultString
* - protects against a null input String
* - Reverse/ReverseDelimited
* - reverses a String
* - Abbreviate
* - abbreviates a string using ellipsis
* - Difference
* - compares two Strings and reports on their differences
* - LevensteinDistance
* - the number of changes needed to change one String into another
*
*
* The StringUtils
class defines certain words related to
* String handling.
*
*
* - null -
null
* - empty - a zero-length string (
""
)
* - space - the space character (
' '
, char 32)
* - whitespace - the characters defined by {@link Character#isWhitespace(char)}
* - trim - the characters <= 32 as in {@link String#trim()}
*
*
* StringUtils
handles null
input Strings quietly.
* That is to say that a null
input will return null
.
* Where a boolean
or int
is being returned
* details vary by method.
*
* A side effect of the null
handling is that a
* NullPointerException
should be considered a bug in
* StringUtils
(except for deprecated methods).
*
* Methods in this class give sample code to explain their operation.
* The symbol *
is used to indicate any input including null
.
*
* @see java.lang.String
* @since 1.0
* @version $Id$
*/
public class StringUtils {
/**
* An empty immutable String
array.
*/
public static final String[] EMPTY_STRING_ARRAY = new String[0];
// Performance testing notes (JDK 1.4, Jul03, scolebourne)
// Whitespace:
// Character.isWhitespace() is faster than WHITESPACE.indexOf()
// where WHITESPACE is a string of all whitespace characters
//
// Character access:
// String.charAt(n) versus toCharArray(), then array[n]
// String.charAt(n) is about 15% worse for a 10K string
// They are about equal for a length 50 string
// String.charAt(n) is about 4 times better for a length 3 string
// String.charAt(n) is best bet overall
//
// Append:
// String.concat about twice as fast as StringBuilder.append
// (not sure who tested this)
/**
* The empty String ""
.
* @since 2.0
*/
public static final String EMPTY = "";
/**
* Represents a failed index search.
* @since 2.1
*/
public static final int INDEX_NOT_FOUND = -1;
/**
* The maximum size to which the padding constant(s) can expand.
*/
private static final int PAD_LIMIT = 8192;
/**
* StringUtils
instances should NOT be constructed in
* standard programming. Instead, the class should be used as
* StringUtils.trim(" foo ");
.
*
* This constructor is public to permit tools that require a JavaBean
* instance to operate.
*/
public StringUtils() {
super();
}
public static String ucFirst(final String name) {
return name.toUpperCase().charAt( 0 ) + name.substring( 1 );
}
// Empty checks
//-----------------------------------------------------------------------
/**
* Checks if a String is empty ("") or null.
*
*
* StringUtils.isEmpty(null) = true
* StringUtils.isEmpty("") = true
* StringUtils.isEmpty(" ") = false
* StringUtils.isEmpty("bob") = false
* StringUtils.isEmpty(" bob ") = false
*
*
* NOTE: This method changed in Lang version 2.0.
* It no longer trims the String.
* That functionality is available in isBlank().
*
* @param str the String to check, may be null
* @return true
if the String is empty or null
*/
public static boolean isEmpty(final CharSequence str) {
if ( str == null || str.length() == 0 ) {
return true;
}
for ( int i = 0, length = str.length(); i < length; i++ ){
if ( !isWhitespace(str.charAt( i )) ) {
return false;
}
}
return true;
}
// Padding
//-----------------------------------------------------------------------
/**
* Repeat a String repeat
times to form a
* new String.
*
*
* StringUtils.repeat(null, 2) = null
* StringUtils.repeat("", 0) = ""
* StringUtils.repeat("", 2) = ""
* StringUtils.repeat("a", 3) = "aaa"
* StringUtils.repeat("ab", 2) = "abab"
* StringUtils.repeat("a", -2) = ""
*
*
* @param str the String to repeat, may be null
* @param repeat number of times to repeat str, negative treated as zero
* @return a new String consisting of the original String repeated,
* null
if null String input
*/
public static String repeat(final String str,
final int repeat) {
// Performance tuned for 2.0 (JDK1.4)
if ( str == null ) {
return null;
}
if ( repeat <= 0 ) {
return EMPTY;
}
final int inputLength = str.length();
if ( repeat == 1 || inputLength == 0 ) {
return str;
}
if ( inputLength == 1 && repeat <= PAD_LIMIT ) {
return padding( repeat,
str.charAt( 0 ) );
}
final int outputLength = inputLength * repeat;
switch ( inputLength ) {
case 1 :
final char ch = str.charAt( 0 );
final char[] output1 = new char[outputLength];
for ( int i = repeat - 1; i >= 0; i-- ) {
output1[i] = ch;
}
return new String( output1 );
case 2 :
final char ch0 = str.charAt( 0 );
final char ch1 = str.charAt( 1 );
final char[] output2 = new char[outputLength];
for ( int i = repeat * 2 - 2; i >= 0; i--, i-- ) {
output2[i] = ch0;
output2[i + 1] = ch1;
}
return new String( output2 );
default :
final StringBuilder buf = new StringBuilder( outputLength );
for ( int i = 0; i < repeat; i++ ) {
buf.append( str );
}
return buf.toString();
}
}
/**
* Splits the provided text into an array, separators specified,
* preserving all tokens, including empty tokens created by adjacent
* separators. This is an alternative to using StringTokenizer.
*
* The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* For more control over the split use the StrTokenizer class.
*
* A null
input String returns null
.
* A null
separatorChars splits on whitespace.
*
*
* StringUtils.splitPreserveAllTokens(null, *) = null
* StringUtils.splitPreserveAllTokens("", *) = []
* StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""]
* StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""]
* StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"]
* StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"]
* StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"]
* StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""]
*
*
* @param str the String to parse, may be null
* @param separatorChars the characters used as the delimiters,
* null
splits on whitespace
* @return an array of parsed Strings, null
if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(final String str,
final String separatorChars) {
return splitWorker( str,
separatorChars,
-1,
true );
}
/**
* Performs the logic for the split
and
* splitPreserveAllTokens
methods that return a maximum array
* length.
*
* @param str the String to parse, may be null
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if true
, adjacent separators are
* treated as empty token separators; if false
, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, null
if null String input
*/
private static String[] splitWorker(final String str,
final String separatorChars,
final int max,
final boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if ( str == null ) {
return null;
}
final int len = str.length();
if ( len == 0 ) {
return EMPTY_STRING_ARRAY;
}
final List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if ( separatorChars == null ) {
// Null separator means use whitespace
while ( i < len ) {
if ( isWhitespace(str.charAt(i)) ) {
if ( match || preserveAllTokens ) {
lastMatch = true;
if ( sizePlus1++ == max ) {
i = len;
lastMatch = false;
}
list.add( str.substring( start,
i ) );
match = false;
}
start = ++i;
continue;
} else {
lastMatch = false;
}
match = true;
i++;
}
} else if ( separatorChars.length() == 1 ) {
// Optimise 1 character case
final char sep = separatorChars.charAt( 0 );
while ( i < len ) {
if ( str.charAt( i ) == sep ) {
if ( match || preserveAllTokens ) {
lastMatch = true;
if ( sizePlus1++ == max ) {
i = len;
lastMatch = false;
}
list.add( str.substring( start,
i ) );
match = false;
}
start = ++i;
continue;
} else {
lastMatch = false;
}
match = true;
i++;
}
} else {
// standard case
while ( i < len ) {
if ( separatorChars.indexOf( str.charAt( i ) ) >= 0 ) {
if ( match || preserveAllTokens ) {
lastMatch = true;
if ( sizePlus1++ == max ) {
i = len;
lastMatch = false;
}
list.add( str.substring( start,
i ) );
match = false;
}
start = ++i;
continue;
} else {
lastMatch = false;
}
match = true;
i++;
}
}
if ( match || (preserveAllTokens && lastMatch) ) {
list.add( str.substring( start,
i ) );
}
return list.toArray( new String[list.size()] );
}
/**
* Returns padding using the specified delimiter repeated
* to a given length.
*
*
* StringUtils.padding(0, 'e') = ""
* StringUtils.padding(3, 'e') = "eee"
* StringUtils.padding(-2, 'e') = IndexOutOfBoundsException
*
*
* Note: this method doesn't not support padding with
* Unicode Supplementary Characters
* as they require a pair of char
s to be represented.
* If you are needing to support full I18N of your applications
* consider using {@link #repeat(String, int)} instead.
*
*
* @param repeat number of times to repeat delim
* @param padChar character to repeat
* @return String with repeated character
* @throws IndexOutOfBoundsException if repeat < 0
* @see #repeat(String, int)
*/
public static String padding(final int repeat,
final char padChar) throws IndexOutOfBoundsException {
if ( repeat < 0 ) {
throw new IndexOutOfBoundsException( "Cannot pad a negative amount: " + repeat );
}
final char[] buf = new char[repeat];
for ( int i = 0; i < buf.length; i++ ) {
buf[i] = padChar;
}
return new String( buf );
}
public static String readFileAsString(Reader reader) {
try {
StringBuilder fileData = new StringBuilder( 1000 );
char[] buf = new char[1024];
int numRead;
while ( (numRead = reader.read( buf )) != -1 ) {
String readData = String.valueOf( buf,
0,
numRead );
fileData.append( readData );
buf = new char[1024];
}
reader.close();
return fileData.toString();
} catch ( IOException e ) {
throw new RuntimeException( e );
}
}
/**
* Unescapes any Java literals found in the String
.
* For example, it will turn a sequence of '\'
and
* 'n'
into a newline character, unless the '\'
* is preceded by another '\'
.
*
* @param str the String
to unescape, may be null
* @return a new unescaped String
, null
if null string input
*/
public static String unescapeJava(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter(str.length());
unescapeJava(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new RuntimeException(ioe);
}
}
/**
* Unescapes any Java literals found in the String
to a
* Writer
.
*
* For example, it will turn a sequence of '\'
and
* 'n'
into a newline character, unless the '\'
* is preceded by another '\'
.
*
* A null
string input has no effect.
*
* @param out the Writer
used to output unescaped characters
* @param str the String
to unescape, may be null
* @throws IllegalArgumentException if the Writer is null
* @throws IOException if error occurs on underlying Writer
*/
public static void unescapeJava(Writer out, String str) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz = str.length();
StringBuilder unicode = new StringBuilder(4);
boolean hadSlash = false;
boolean inUnicode = false;
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
if (inUnicode) {
// if in unicode, then we're reading unicode
// values in somehow
unicode.append(ch);
if (unicode.length() == 4) {
// unicode now contains the four hex digits
// which represents our unicode character
try {
int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value);
unicode.setLength(0);
inUnicode = false;
hadSlash = false;
} catch (NumberFormatException nfe) {
throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe);
}
}
continue;
}
if (hadSlash) {
// handle an escaped value
hadSlash = false;
switch (ch) {
case '\\':
out.write('\\');
break;
case '\'':
out.write('\'');
break;
case '\"':
out.write("\"");
break;
case 'r':
out.write('\r');
break;
case 'f':
out.write('\f');
break;
case 't':
out.write('\t');
break;
case 'n':
out.write('\n');
break;
case 'b':
out.write('\b');
break;
case 'u':
{
// uh-oh, we're in unicode country....
inUnicode = true;
break;
}
default :
out.write(ch);
break;
}
continue;
} else if (ch == '\\') {
hadSlash = true;
continue;
}
out.write(ch);
}
if (hadSlash) {
// then we're in the weird case of a \ at the end of the
// string, let's output it anyway.
out.write('\\');
}
}
private static final String FOLDER_SEPARATOR = "/";
private static final String WINDOWS_FOLDER_SEPARATOR = "\\";
private static final String TOP_PATH = "..";
private static final String CURRENT_PATH = ".";
private static final char EXTENSION_SEPARATOR = '.';
/**
* Normalize the path by suppressing sequences like "path/.." and
* inner simple dots.
* The result is convenient for path comparison. For other uses,
* notice that Windows separators ("\") are replaced by simple slashes.
* @param path the original path
* @return the normalized path
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String cleanPath(String path) {
if (path == null) {
return null;
}
String pathToUse = replace(path, WINDOWS_FOLDER_SEPARATOR, FOLDER_SEPARATOR);
// Strip prefix from path to analyze, to not treat it as part of the
// first path element. This is necessary to correctly parse paths like
// "file:core/../core/io/Resource.class", where the ".." should just
// strip the first "core" directory while keeping the "file:" prefix.
int prefixIndex = pathToUse.indexOf(":");
String prefix = "";
if (prefixIndex != -1) {
prefix = pathToUse.substring(0, prefixIndex + 1);
pathToUse = pathToUse.substring(prefixIndex + 1);
}
if (pathToUse.startsWith(FOLDER_SEPARATOR)) {
prefix = prefix + FOLDER_SEPARATOR;
pathToUse = pathToUse.substring(1);
}
String[] pathArray = delimitedListToStringArray(pathToUse, FOLDER_SEPARATOR);
List pathElements = new LinkedList();
int tops = 0;
for (int i = pathArray.length - 1; i >= 0; i--) {
String element = pathArray[i];
if (CURRENT_PATH.equals(element)) {
// Points to current directory - drop it.
}
else if (TOP_PATH.equals(element)) {
// Registering top path found.
tops++;
}
else {
if (tops > 0) {
// Merging path element with element corresponding to top path.
tops--;
}
else {
// Normal path element found.
pathElements.add(0, element);
}
}
}
// Remaining top paths need to be retained.
for (int i = 0; i < tops; i++) {
pathElements.add(0, TOP_PATH);
}
return prefix + collectionToDelimitedString(pathElements, FOLDER_SEPARATOR);
}
/**
* Convenience method to return a Collection as a delimited (e.g. CSV)
* String. E.g. useful for toString()
implementations.
* @param coll the Collection to display
* @param delim the delimiter to use (probably a ",")
* @param prefix the String to start each element with
* @param suffix the String to end each element with
* @return the delimited String
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String collectionToDelimitedString(Collection coll, String delim, String prefix, String suffix) {
if (coll == null || coll.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
Iterator it = coll.iterator();
while (it.hasNext()) {
sb.append(prefix).append(it.next()).append(suffix);
if (it.hasNext()) {
sb.append(delim);
}
}
return sb.toString();
}
/**
* Convenience method to return a Collection as a delimited (e.g. CSV)
* String. E.g. useful for toString()
implementations.
* @param coll the Collection to display
* @param delim the delimiter to use (probably a ",")
* @return the delimited String
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String collectionToDelimitedString(Collection coll, String delim) {
return collectionToDelimitedString(coll, delim, "", "");
}
/**
* Replace all occurences of a substring within a string with
* another string.
* @param inString String to examine
* @param oldPattern String to replace
* @param newPattern String to insert
* @return a String with the replacements
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String replace(String inString, String oldPattern, String newPattern) {
if (isEmpty(inString) || isEmpty(oldPattern) || newPattern == null) {
return inString;
}
StringBuilder sbuf = new StringBuilder();
// output StringBuilder we'll build up
int pos = 0; // our position in the old string
int index = inString.indexOf(oldPattern);
// the index of an occurrence we've found, or -1
int patLen = oldPattern.length();
while (index >= 0) {
sbuf.append(inString.substring(pos, index));
sbuf.append(newPattern);
pos = index + patLen;
index = inString.indexOf(oldPattern, pos);
}
sbuf.append(inString.substring(pos));
// remember to append any characters to the right of a match
return sbuf.toString();
}
public static URI toURI(String location) throws URISyntaxException {
return new URI( replace(location, " ", "%20") );
}
public static String escapeXmlString(String string) {
StringBuilder sb = new StringBuilder(string.length());
// true if last char was blank
int len = string.length();
for (int i = 0; i < len; i++) {
char c = string.charAt(i);
if (c == ' ') {
sb.append(' ');
} else {
// HTML Special Chars
if (c == '"')
sb.append(""");
else if (c == '&')
sb.append("&");
else if (c == '<')
sb.append("<");
else if (c == '>')
sb.append(">");
else {
int ci = 0xffff & c;
if (ci < 160 ) {
// nothing special only 7 Bit
sb.append(c);
} else {
// Not 7 Bit use the unicode system
sb.append("");
sb.append(Integer.valueOf(ci).toString());
sb.append(';');
}
}
}
}
return sb.toString();
}
/**
* Take a String which is a delimited list and convert it to a String array.
*
A single delimiter can consists of more than one character: It will still
* be considered as single delimiter string, rather than as bunch of potential
* delimiter characters - in contrast to tokenizeToStringArray
.
* @param str the input String
* @param delimiter the delimiter between elements (this is a single delimiter,
* rather than a bunch individual delimiter characters)
* @return an array of the tokens in the list
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String[] delimitedListToStringArray(String str, String delimiter) {
return delimitedListToStringArray(str, delimiter, null);
}
/**
* Take a String which is a delimited list and convert it to a String array.
*
A single delimiter can consists of more than one character: It will still
* be considered as single delimiter string, rather than as bunch of potential
* delimiter characters - in contrast to tokenizeToStringArray
.
* @param str the input String
* @param delimiter the delimiter between elements (this is a single delimiter,
* rather than a bunch individual delimiter characters)
* @param charsToDelete a set of characters to delete. Useful for deleting unwanted
* line breaks: e.g. "\r\n\f" will delete all new lines and line feeds in a String.
* @return an array of the tokens in the list
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String[] delimitedListToStringArray(String str, String delimiter, String charsToDelete) {
if (str == null) {
return new String[0];
}
if (delimiter == null) {
return new String[] {str};
}
List result = new ArrayList();
if ("".equals(delimiter)) {
for (int i = 0; i < str.length(); i++) {
result.add(deleteAny(str.substring(i, i + 1), charsToDelete));
}
}
else {
int pos = 0;
int delPos;
while ((delPos = str.indexOf(delimiter, pos)) != -1) {
result.add(deleteAny(str.substring(pos, delPos), charsToDelete));
pos = delPos + delimiter.length();
}
if (str.length() > 0 && pos <= str.length()) {
// Add rest of String, but not in case of empty input.
result.add(deleteAny(str.substring(pos), charsToDelete));
}
}
return toStringArray(result);
}
/**
* Copy the given Collection into a String array.
* The Collection must contain String elements only.
* @param collection the Collection to copy
* @return the String array (null
if the passed-in
* Collection was null
)
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String[] toStringArray(Collection collection) {
if (collection == null) {
return null;
}
return (String[]) collection.toArray(new String[collection.size()]);
}
/**
* Delete any character in a given String.
* @param inString the original String
* @param charsToDelete a set of characters to delete.
* E.g. "az\n" will delete 'a's, 'z's and new lines.
* @return the resulting String
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String deleteAny(String inString, String charsToDelete) {
if (isEmpty(inString) || isEmpty(charsToDelete)) {
return inString;
}
StringBuilder out = new StringBuilder();
for (int i = 0; i < inString.length(); i++) {
char c = inString.charAt(i);
if (charsToDelete.indexOf(c) == -1) {
out.append(c);
}
}
return out.toString();
}
public static String toString(Reader reader) throws IOException {
if ( reader instanceof BufferedReader ) {
return toString( (BufferedReader) reader );
} else {
return toString( new BufferedReader( reader ) );
}
}
public static String toString(InputStream is) throws IOException {
return toString( new BufferedReader(new InputStreamReader(is, "UTF-8") ) );
}
public static String toString(BufferedReader reader) throws IOException {
StringBuilder sb = new StringBuilder();
try {
String line;
boolean previousLine = false;
while ((line = reader.readLine()) != null) {
if ( previousLine ) {
sb.append("\n");
}
sb.append(line);
previousLine = true;
}
} finally {
reader.close();
}
return sb.toString();
}
public static String generateUUID() {
char[] uuid = new char[32];
char[] chars = UUID.randomUUID().toString().toCharArray();
for (int i = 0, j = 0; i < 32; j++) if (chars[j] != '-') uuid[i++] = chars[j];
return new String(uuid);
}
public static String extractFirstIdentifier(String string, int start) {
StringBuilder builder = new StringBuilder();
extractFirstIdentifier(string, builder, start);
return builder.toString();
}
public static int extractFirstIdentifier(String string, StringBuilder builder, int start) {
boolean started = false;
int i = start;
for (; i < string.length(); i++) {
char ch = string.charAt(i);
if (Character.isJavaIdentifierStart(ch)) {
builder.append(ch);
started = true;
}
else if (started && Character.isJavaIdentifierPart(ch)) {
builder.append(ch);
} else if (started) {
break;
}
}
return i;
}
public static int skipBlanks(String string, int start) {
int i = start;
while (i < string.length() && (string.charAt(i) == ' ' || string.charAt(i) == '\t')) {
i++;
}
return i;
}
public static List splitStatements(CharSequence string) {
return codeAwareSplitOnChar(string, ';');
}
public static List splitArgumentsList(CharSequence string) {
return codeAwareSplitOnChar(string, ',');
}
private static List codeAwareSplitOnChar(CharSequence string, char ch) {
List args = new ArrayList();
int lastStart = 0;
int nestedParam = 0;
boolean isQuoted = false;
for (int i = 0; i < string.length(); i++) {
if (string.charAt( i ) == ch) {
if (!isQuoted && nestedParam == 0) {
args.add(string.subSequence(lastStart, i).toString().trim());
lastStart = i+1;
}
} else {
switch (string.charAt(i)) {
case '(':
case '[':
case '{':
if (!isQuoted) nestedParam++;
break;
case ')':
case ']':
case '}':
if (!isQuoted) nestedParam--;
break;
case '"':
case '\'':
if (i == 0 || string.charAt(i-1) != '\\') {
isQuoted = !isQuoted;
}
break;
case '\\':
if (i+1 < string.length() && string.charAt(i+1) == '"') {
i++;
}
break;
}
}
}
String lastArg = string.subSequence(lastStart, string.length()).toString().trim();
if (lastArg.length() > 0) {
args.add(lastArg);
}
return args;
}
/**
* Compares two string being equals ignoring whitespaces, but preserving whitespace between double-quotes
* The two inputs MUST BE valid DRL/Java syntax (this validation is NOT performed by this method, this method assumes they are).
* Null check: if either of the input is null, this method will return true IFF both are null.
* Empty check: if either of the input is an empty string, it will be considered as a whitespace during code-aware comparison.
*/
public static boolean codeAwareEqualsIgnoreSpaces(String in1, String in2) {
if ( in1 == null || in2 == null ) {
return in1 == null && in2 == null;
}
if ( in1.isEmpty() && in2.isEmpty() ) {
return true;
}
if ( in1.length() == 0 ) {
in1 = " ";
}
if ( in2.length() == 0 ) {
in2 = " ";
}
int idx1 = 0; Character quoted1 = null;
int idx2 = 0; Character quoted2 = null;
boolean equals = true;
while ( equals ) {
while ( idx1 < (in1.length()) && (quoted1==null) && isWhitespace(in1.charAt(idx1)) ) {
idx1++;
}
while ( idx2 < (in2.length()) && (quoted2==null) && isWhitespace(in2.charAt(idx2)) ) {
idx2++;
}
if ( idx1 >= in1.length() || idx2 >= in2.length() ) {
// considered equals if equals check succeeded and both indexes reached end of respective string.
equals = equals && idx1 == in1.length() && idx2 == in2.length();
break;
}
if ( in1.charAt(idx1) == '"' || in1.charAt(idx1) == '\'' ) {
if ( quoted1 == null ) {
quoted1 = in1.charAt(idx1);
} else if ( quoted1 != null && quoted1.equals( in1.charAt(idx1) ) ) {
if ( in1.charAt(idx1-1) != '\\' ) {
quoted1 = null;
}
}
}
if ( in2.charAt(idx2) == '"' || in2.charAt(idx2) == '\'' ) {
if ( quoted2 == null ) {
quoted2 = in2.charAt(idx2);
} else if ( quoted2 != null && quoted2.equals( in2.charAt(idx2) ) ) {
if ( in2.charAt(idx2-1) != '\\' ) {
quoted2 = null;
}
}
}
equals &= in1.charAt(idx1) == in2.charAt(idx2);
idx1++;
idx2++;
}
return equals;
}
public static int findEndOfMethodArgsIndex(CharSequence string, int startOfMethodArgsIndex) {
boolean isDoubleQuoted = false;
boolean isSingleQuoted = false;
int nestingLevel = 0;
for (int charIndex = startOfMethodArgsIndex; charIndex < string.length(); charIndex++) {
boolean isCurrentCharEscaped = charIndex > 0 && string.charAt(charIndex - 1) == '\\';
switch (string.charAt(charIndex)) {
case '(':
if (!isDoubleQuoted && !isSingleQuoted) {
nestingLevel++;
}
break;
case ')':
if (!isDoubleQuoted && !isSingleQuoted) {
nestingLevel--;
if (nestingLevel == 0) {
return charIndex;
}
}
break;
case '"':
if (isCurrentCharEscaped || isSingleQuoted) {
// ignore escaped double quote and double quote inside single quotes (e.g 'text " text')
continue;
}
isDoubleQuoted = !isDoubleQuoted;
break;
case '\'':
if (isCurrentCharEscaped || isDoubleQuoted) {
// ignore escaped single quote and single quote inside double quotes (e.g. "text ' text")
continue;
}
isSingleQuoted = !isSingleQuoted;
break;
default:
// nothing to do, just continue with next character
}
}
return -1;
}
public static int indexOfOutOfQuotes(String str, String searched) {
for ( int i = str.indexOf(searched); i >= 0; i = str.indexOf(searched, i+1) ) {
if ( countQuoteOccurrences( str, 0, i ) % 2 == 0 ) {
return i;
}
}
return -1;
}
private static int countQuoteOccurrences(String str, int start, int end) {
int count = 0;
for (int i = start; i < end; i++) {
if (str.charAt(i) == '"' && (i == 0 || str.charAt(i-1) != '\\')) {
count++;
}
}
return count;
}
public static int indexOfOutOfQuotes(String str, char searched) {
boolean isQuoted = false;
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt( i );
if (ch == '"' && (i == 0 || str.charAt(i-1) != '\\')) {
isQuoted = !isQuoted;
} else if (ch == searched && !isQuoted) {
return i;
}
}
return -1;
}
public static boolean isIdentifier(String expr) {
return !expr.equals("true") && !expr.equals("false") &&
!expr.equals("null") && expr.matches("[\\p{L}_\\$][\\p{L}_\\$\\p{N}]*");
}
public static boolean isDereferencingIdentifier(String expr) {
return !expr.equals("true") && !expr.equals("false") &&
!expr.equals("null") && expr.matches("[\\p{L}_\\$][\\p{L}_\\$\\p{N}\\.]*");
}
// To be extended in the future with more comparison strategies
public enum SIMILARITY_STRATS { DICE }
public static double stringSimilarity( String s1, String s2, SIMILARITY_STRATS method ) {
switch ( method ) {
case DICE:
default: return stringSimilarityDice( s1, s2 );
}
}
private static double stringSimilarityDice( String s1, String s2 ) {
int n1 = s1.length() - 1;
int n2 = s2.length() - 1;
int n;
if ( s1.length() < s2.length() ) {
n = commonBigrams( s1, s2 );
} else {
n = commonBigrams( s2, s1 );
}
return (2.0 * n) / ( n1 + n2 );
}
private static int commonBigrams( String s1, String s2 ) {
int acc = 0;
for ( int j = 0; j < s1.length() - 1; j++ ) {
String bigram = s1.substring( j, j +1 );
acc += s2.indexOf( bigram ) >= 0 ? 1 : 0;
}
return acc;
}
public static boolean equalsIgnoreSpaces(String s1, String s2) {
return s1.replaceAll( "\\s+", "" ).equals( s2.replaceAll( "\\s+", "" ) );
}
public static String uuid() {
return "x" + UUID.randomUUID().toString().replace( '-', 'x' );
}
}