net.snowflake.common.util.StringUtil Maven / Gradle / Ivy
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package net.snowflake.common.util;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
/** @author jhuang */
public class StringUtil {
public static Pattern octalCode =
Pattern.compile("^\\\\0*(([0-7])|([0-7][0-7])|([0-3][0-7][0-7]))$");
/**
* TODO: currently we only do escape sequence replacement for special strings that are used in
* table stage area file format. We don't do general replacement for escape sequence in any
* string, which is something we should do for any string literal later.
*/
static Map escapeSequenceToCharMap = new HashMap();
static {
escapeSequenceToCharMap.put("\\\"", String.valueOf((char) '"'));
escapeSequenceToCharMap.put("\\0", String.valueOf((char) '\0')); // nul
escapeSequenceToCharMap.put("\\f", String.valueOf((char) '\f')); // form feed
escapeSequenceToCharMap.put("\\t", String.valueOf((char) '\t')); // tab
escapeSequenceToCharMap.put("\\n", String.valueOf((char) '\n')); // newline
escapeSequenceToCharMap.put("\\r", String.valueOf((char) '\r')); // return
escapeSequenceToCharMap.put("\\\\", String.valueOf((char) '\\')); // backslash
}
/**
* A small function to check if a string is an octal value and if so, convert it to the internal
* value the octal value represents. If not, it returns the original value.
*
* TODO: change this to handle octal value in any string, instead of octal value only string.
*
* @param value a string including an octal value
* @return a character
*/
public static String checkAndConvertOctalToChar(String value) {
if (octalCode.matcher((value)).matches()) {
try {
return StringUtil.convertOctalToChar(value);
} catch (NumberFormatException ex) {
return value;
}
}
return value;
}
/**
* Converts a string including an octal value to an character
*
* @param value a string including an octal value
* @return a character
*/
public static String convertOctalToChar(String value) {
if (value.startsWith("\\")) {
value = value.substring(1);
}
// Convert Octal(base8) to decimal(base 10).
Integer valueInDecimal = Integer.parseInt(value, 8);
// Cast decimal to its corresponding ASCII value.
char valueInChar = (char) valueInDecimal.intValue();
return String.valueOf(valueInChar);
}
/**
* Converts a string including an octal value to a character
*
* @param value a string including an octal value
* @return a character
*/
public static String convertHexToChar(String value) {
if (value.toLowerCase().startsWith("0x")) {
value = value.substring(2);
}
// Convert Hex(base16) to decimal(base 10).
Integer valueInDecimal = Integer.parseInt(value, 16);
// Cast decimal to its corresponding ASCII value.
char valueInChar = (char) valueInDecimal.intValue();
return String.valueOf(valueInChar);
}
/**
* Converts escape sequence to a char
*
* @param value an escape sequence
* @return a char
*/
public static String convertEscapedSequenceToChar(String value) {
return escapeSequenceToCharMap.get(value);
}
/**
* return a quoted string if the string itself contains a quote.
*
* @param str a string
* @return a quoted string if a double quote is included otherwise the original string
*/
public static String quote(String str) {
// Double quote identifiers only if they contain quotes
if (str.contains("\"")) {
return '"' + str + '"';
}
return str;
}
/**
* Is pure ASCII characters?
*
* @param s a string
* @return true if the string contains pure ASCII characters otherwise false
*/
public static boolean isPureAscii(String s) {
// 7 bit ascii
CharsetEncoder asciiEncode = Charset.forName("US-ASCII").newEncoder();
return asciiEncode.canEncode(s);
}
/**
* checks if input string contains number, alphabets and underscores. Returns false if it contains
* any other characters.
*
* @param sequence
* @return true if string contains only alphabets, numbers and underscores otherwise false.
*/
public static boolean isAlphaNumericUnderscore(String sequence) {
return sequence.matches("[A-Za-z0-9_]+");
}
}