org.hsqldb.lib.StringConverter Maven / Gradle / Ivy
/*
* For work developed by the HSQL Development Group:
*
* Copyright (c) 2001-2011, The HSQL Development Group
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the HSQL Development Group nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
*
* For work originally developed by the Hypersonic SQL Group:
*
* Copyright (c) 1995-2000, The Hypersonic SQL Group.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the Hypersonic SQL Group nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP,
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This software consists of voluntary contributions made by many individuals
* on behalf of the Hypersonic SQL Group.
*/
package org.hsqldb.lib;
import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;
import org.hsqldb.map.BitMap;
/**
* Collection of static methods for converting strings between different
* formats and to and from byte arrays.
*
* Includes some methods based on Hypersonic code as indicated.
*
* @author Thomas Mueller (Hypersonic SQL Group)
* @author Fred Toussi (fredt@users dot sourceforge.net)
* @version 2.0.1
* @since 1.7.2
*/
public class StringConverter {
private static final byte[] HEXBYTES = {
(byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
(byte) '6', (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b',
(byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f'
};
private static int getNibble(int value) {
if (value >= '0' && value <= '9') {
return value - '0';
}
if (value >= 'a' && value <= 'f') {
return 10 + value - 'a';
}
if (value >= 'A' && value <= 'F') {
return 10 + value - 'A';
}
return -1;
}
/**
* Converts a hexadecimal string into a byte array
*
*
* @param s hexadecimal string
*
* @return byte array for the hex string
* @throws IOException
*/
public static byte[] hexStringToByteArray(String s) throws IOException {
int l = s.length();
byte[] data = new byte[l / 2 + (l % 2)];
int n,
b = 0;
boolean high = true;
int i = 0;
for (int j = 0; j < l; j++) {
char c = s.charAt(j);
if (c == ' ') {
continue;
}
n = getNibble(c);
if (n == -1) {
throw new IOException(
"hexadecimal string contains non hex character"); //NOI18N
}
if (high) {
b = (n & 0xf) << 4;
high = false;
} else {
b += (n & 0xf);
high = true;
data[i++] = (byte) b;
}
}
if (!high) {
throw new IOException(
"hexadecimal string with odd number of characters"); //NOI18N
}
if (i < data.length) {
data = (byte[]) ArrayUtil.resizeArray(data, i);
}
return data;
}
/**
* Compacts a bit string into a BitMap
*
*
* @param s bit string
*
* @return byte array for the hex string
* @throws IOException
*/
public static BitMap sqlBitStringToBitMap(String s) throws IOException {
int l = s.length();
int n;
int bitIndex = 0;
BitMap map = new BitMap(l, true);
for (int j = 0; j < l; j++) {
char c = s.charAt(j);
if (c == ' ') {
continue;
}
n = getNibble(c);
if (n != 0 && n != 1) {
throw new IOException(
"hexadecimal string contains non hex character"); //NOI18N
}
if (n == 1) {
map.set(bitIndex);
}
bitIndex++;
}
map.setSize(bitIndex);
return map;
}
/**
* Converts a byte array into a hexadecimal string
*
*
* @param b byte array
*
* @return hex string
*/
public static String byteArrayToHexString(byte[] b) {
int len = b.length;
char[] s = new char[len * 2];
for (int i = 0, j = 0; i < len; i++) {
int c = ((int) b[i]) & 0xff;
s[j++] = (char) HEXBYTES[c >> 4 & 0xf];
s[j++] = (char) HEXBYTES[c & 0xf];
}
return new String(s);
}
/**
* Converts a byte array into an SQL hexadecimal string
*
*
* @param b byte array
*
* @return hex string
*/
public static String byteArrayToSQLHexString(byte[] b) {
int len = b.length;
char[] s = new char[len * 2 + 3];
s[0] = 'X';
s[1] = '\'';
int j = 2;
for (int i = 0; i < len; i++) {
int c = ((int) b[i]) & 0xff;
s[j++] = (char) HEXBYTES[c >> 4 & 0xf];
s[j++] = (char) HEXBYTES[c & 0xf];
}
s[j] = '\'';
return new String(s);
}
/**
* Converts a byte array into a bit string
*
*
* @param bytes byte array
* @param bitCount number of bits
* @return hex string
*/
public static String byteArrayToBitString(byte[] bytes, int bitCount) {
char[] s = new char[bitCount];
for (int j = 0; j < bitCount; j++) {
byte b = bytes[j / 8];
s[j] = BitMap.isSet(b, j % 8) ? '1'
: '0';
}
return new String(s);
}
/**
* Converts a byte array into an SQL binary string
*
*
* @param bytes byte array
* @param bitCount number of bits
* @return hex string
*/
public static String byteArrayToSQLBitString(byte[] bytes, int bitCount) {
char[] s = new char[bitCount + 3];
s[0] = 'B';
s[1] = '\'';
int pos = 2;
for (int j = 0; j < bitCount; j++) {
byte b = bytes[j / 8];
s[pos++] = BitMap.isSet(b, j % 8) ? '1'
: '0';
}
s[pos] = '\'';
return new String(s);
}
/**
* Converts a byte array into hexadecimal characters which are written as
* ASCII to the given output stream.
*
* @param o output array
* @param from offset into output array
* @param b input array
*/
public static void writeHexBytes(byte[] o, int from, byte[] b) {
int len = b.length;
for (int i = 0; i < len; i++) {
int c = ((int) b[i]) & 0xff;
o[from++] = HEXBYTES[c >> 4 & 0xf];
o[from++] = HEXBYTES[c & 0xf];
}
}
public static String byteArrayToString(byte[] b, String charset) {
try {
return (charset == null) ? new String(b)
: new String(b, charset);
} catch (Exception e) {}
return null;
}
/**
* Hsqldb specific encoding used only for log files. The SQL statements that
* need to be written to the log file (input) are Java Unicode strings.
* input is converted into a 7bit escaped ASCII string (output)with the
* following transformations. All characters outside the 0x20-7f range are
* converted to a escape sequence and added to output. If a backslash
* character is immdediately followed by 'u', the backslash character is
* converted to escape sequence and added to output. All the remaining
* characters in input are added to output without conversion. The escape
* sequence is backslash, letter u, xxxx, where xxxx is the hex
* representation of the character code. (fredt@users)
*
* Method based on Hypersonic Code
*
* @param b output stream to wite to
* @param s Java string
* @param doubleSingleQuotes boolean
*/
public static void stringToUnicodeBytes(HsqlByteArrayOutputStream b,
String s, boolean doubleSingleQuotes) {
if (s == null) {
return;
}
final int len = s.length();
int extras = 0;
if (len == 0) {
return;
}
b.ensureRoom(len * 2 + 5);
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if (c == '\\') {
if ((i < len - 1) && (s.charAt(i + 1) == 'u')) {
b.writeNoCheck(c); // encode the \ as unicode, so 'u' is ignored
b.writeNoCheck('u');
b.writeNoCheck('0');
b.writeNoCheck('0');
b.writeNoCheck('5');
b.writeNoCheck('c');
extras += 5;
} else {
b.write(c);
}
} else if ((c >= 0x0020) && (c <= 0x007f)) {
b.writeNoCheck(c); // this is 99%
if (c == '\'' && doubleSingleQuotes) {
b.writeNoCheck(c);
extras++;
}
} else {
b.writeNoCheck('\\');
b.writeNoCheck('u');
b.writeNoCheck(HEXBYTES[(c >> 12) & 0xf]);
b.writeNoCheck(HEXBYTES[(c >> 8) & 0xf]);
b.writeNoCheck(HEXBYTES[(c >> 4) & 0xf]);
b.writeNoCheck(HEXBYTES[c & 0xf]);
extras += 5;
}
if (extras > len) {
b.ensureRoom(len + extras + 5);
extras = 0;
}
}
}
// fredt@users 20020522 - fix for 557510 - backslash bug
// this legacy bug resulted from forward reading the input when a backslash
// was present and manifested itself when a backslash was followed
// immdediately by a character outside the 0x20-7f range in a database field.
/**
* Hsqldb specific decoding used only for log files. This method converts
* the 7 bit escaped ASCII strings in a log file back into Java Unicode
* strings. See stringToUnicodeBytes() above.
*
* Method based on Hypersonic Code
*
* @param s encoded ASCII string in byte array
* @return Java string
*/
public static String unicodeStringToString(String s) {
if ((s == null) || (s.indexOf("\\u") == -1)) {
return s;
}
int len = s.length();
char[] b = new char[len];
int j = 0;
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if (c == '\\' && i < len - 5) {
char c1 = s.charAt(i + 1);
if (c1 == 'u') {
i++;
// 4 characters read should always return 0-15
int k = getNibble(s.charAt(++i)) << 12;
k += getNibble(s.charAt(++i)) << 8;
k += getNibble(s.charAt(++i)) << 4;
k += getNibble(s.charAt(++i));
b[j++] = (char) k;
} else {
b[j++] = c;
}
} else {
b[j++] = c;
}
}
return new String(b, 0, j);
}
public static String readUTF(byte[] bytearr, int offset,
int length) throws IOException {
char[] buf = new char[length];
return readUTF(bytearr, offset, length, buf);
}
public static String readUTF(byte[] bytearr, int offset, int length,
char[] buf) throws IOException {
int bcount = 0;
int c, char2, char3;
int count = 0;
while (count < length) {
c = (int) bytearr[offset + count];
if (bcount == buf.length) {
buf = (char[]) ArrayUtil.resizeArray(buf, length);
}
if (c > 0) {
/* 0xxxxxxx*/
count++;
buf[bcount++] = (char) c;
continue;
}
c &= 0xff;
switch (c >> 4) {
case 12 :
case 13 :
/* 110x xxxx 10xx xxxx*/
count += 2;
if (count > length) {
throw new UTFDataFormatException();
}
char2 = (int) bytearr[offset + count - 1];
if ((char2 & 0xC0) != 0x80) {
throw new UTFDataFormatException();
}
buf[bcount++] = (char) (((c & 0x1F) << 6)
| (char2 & 0x3F));
break;
case 14 :
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (count > length) {
throw new UTFDataFormatException();
}
char2 = (int) bytearr[offset + count - 2];
char3 = (int) bytearr[offset + count - 1];
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException();
}
buf[bcount++] = (char) (((c & 0x0F) << 12)
| ((char2 & 0x3F) << 6)
| ((char3 & 0x3F) << 0));
break;
default :
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException();
}
}
// The number of chars produced may be less than length
return new String(buf, 0, bcount);
}
/**
* Writes a string to the specified DataOutput using UTF-8 encoding in a
* machine-independent manner.
*
* @param str a string to be written.
* @param out destination to write to
* @return The number of bytes written out.
*/
public static int stringToUTFBytes(String str,
HsqlByteArrayOutputStream out) {
int strlen = str.length();
int c,
count = 0;
if (out.count + strlen + 8 > out.buffer.length) {
out.ensureRoom(strlen + 8);
}
char[] arr = str.toCharArray();
for (int i = 0; i < strlen; i++) {
c = arr[i];
if (c >= 0x0001 && c <= 0x007F) {
out.buffer[out.count++] = (byte) c;
count++;
} else if (c > 0x07FF) {
out.buffer[out.count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
out.buffer[out.count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
count += 3;
} else {
out.buffer[out.count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
count += 2;
}
if (out.count + 8 > out.buffer.length) {
out.ensureRoom(strlen - i + 8);
}
}
return count;
}
public static int getUTFSize(String s) {
int len = (s == null) ? 0
: s.length();
int l = 0;
for (int i = 0; i < len; i++) {
int c = s.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
l++;
} else if (c > 0x07FF) {
l += 3;
} else {
l += 2;
}
}
return l;
}
/**
* Using an output stream, returns a String from an InputStream.
*
* @param is InputStream to read from
* @param encoding character encoding of the string
* @throws IOException
* @return a Java string
*/
public static String inputStreamToString(InputStream is,
String encoding) throws IOException {
HsqlByteArrayOutputStream baOS = new HsqlByteArrayOutputStream(1024);
while (true) {
int c = is.read();
if (c == -1) {
break;
}
baOS.write(c);
}
return new String(baOS.getBuffer(), 0, baOS.size(), encoding);
}
// fredt@users 20020130 - patch 497872 by Nitin Chauhan - use byte[] of exact size
/**
* Returns the quoted version of the string using the quotechar argument.
* doublequote argument indicates whether each instance of quotechar inside
* the string is doubled.
*
* null string argument returns null. If the caller needs the literal
* "NULL" it should created it itself
*
* @param s Java string
* @param quoteChar character used for quoting
* @param extraQuote true if quoteChar itself should be repeated
* @return String
*/
public static String toQuotedString(String s, char quoteChar,
boolean extraQuote) {
if (s == null) {
return null;
}
int count = extraQuote ? count(s, quoteChar)
: 0;
int len = s.length();
char[] b = new char[2 + count + len];
int i = 0;
int j = 0;
b[j++] = quoteChar;
for (; i < len; i++) {
char c = s.charAt(i);
b[j++] = c;
if (extraQuote && c == quoteChar) {
b[j++] = c;
}
}
b[j] = quoteChar;
return new String(b);
}
/**
* Counts Character c in String s
*
* @param s Java string
* @param c character to count
* @return int count
*/
static int count(final String s, final char c) {
int pos = 0;
int count = 0;
if (s != null) {
while ((pos = s.indexOf(c, pos)) > -1) {
count++;
pos++;
}
}
return count;
}
/**
* Converts the string to an HTML representation in the ASCII character set.
*
* The string is treated as an SQL statement.
*
* @param b the byte array
* @return UUID string form
*/
public static void stringToHtmlBytes(HsqlByteArrayOutputStream b,
String s) {
if (s == null) {
return;
}
final int len = s.length();
char[] chars;
if (len == 0) {
return;
}
chars = s.toCharArray();
b.ensureRoom(len);
for (int i = 0; i < len; i++) {
char c = chars[i];
if (c > 0x007f || c == '"' || c == '&' || c == '<' || c == '>') {
int codePoint = Character.codePointAt(chars, i);
if (Character.charCount(codePoint) == 2) {
i++;
}
b.ensureRoom(16);
b.writeNoCheck('&');
b.writeNoCheck('#');
b.writeBytes(String.valueOf(codePoint));
b.writeNoCheck(';');
} else if (c < 0x0020 ) {
b.writeNoCheck(' ');
} else {
b.writeNoCheck(c);
}
}
}
/**
* Returns a string representation in UUID form from a binary string.
*
* UUID string is composed of 8-4-4-4-12 hexadecimal characters.
*
* @param b the byte array
* @return UUID string form
*/
public static String toStringUUID(byte[] b) {
char[] chars = new char[36];
int hexIndex;
if (b == null) {
return null;
}
if (b.length != 16) {
throw new NumberFormatException();
}
for (int i = 0, j = 0; i < b.length; ) {
hexIndex = (b[i] & 0xf0) >> 4;
chars[j++] = (char) HEXBYTES[hexIndex];
hexIndex = b[i] & 0xf;
chars[j++] = (char) HEXBYTES[hexIndex];
i++;
if (i >= 4 && i <= 10 && (i % 2) == 0) {
chars[j++] = '-';
}
}
return new String(chars);
}
/**
* Returns a byte[] representation in UUID form from a UUID string.
*
* @param s the UUID string
* @return byte array
*/
public static byte[] toBinaryUUID(String s) {
byte[] bytes = new byte[16];
if (s == null) {
return null;
}
if (s.length() != 36) {
throw new NumberFormatException();
}
for (int i = 0, j = 0; i < bytes.length; ) {
char c = s.charAt(j++);
int high = getNibble(c);
c = s.charAt(j++);
bytes[i] = (byte) ((high << 4) + getNibble(c));
i++;
if (i >= 4 && i <= 10 && (i % 2) == 0) {
c = s.charAt(j++);
if (c != '-') {}
}
}
return bytes;
}
}