All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hsqldb.lib.StringConverter Maven / Gradle / Ivy

The newest version!
/*
 * For work developed by the HSQL Development Group:
 *
 * Copyright (c) 2001-2011, The HSQL Development Group
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 *
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * Neither the name of the HSQL Development Group nor the names of its
 * contributors may be used to endorse or promote products derived from this
 * software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *
 *
 * For work originally developed by the Hypersonic SQL Group:
 *
 * Copyright (c) 1995-2000, The Hypersonic SQL Group.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 *
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * Neither the name of the Hypersonic SQL Group nor the names of its
 * contributors may be used to endorse or promote products derived from this
 * software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP,
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * This software consists of voluntary contributions made by many individuals
 * on behalf of the Hypersonic SQL Group.
 */


package org.hsqldb.lib;

import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;

import org.hsqldb.map.BitMap;

/**
 * Collection of static methods for converting strings between different
 * formats and to and from byte arrays.

* * Includes some methods based on Hypersonic code as indicated. * * @author Thomas Mueller (Hypersonic SQL Group) * @author Fred Toussi (fredt@users dot sourceforge.net) * @version 2.0.1 * @since 1.7.2 */ public class StringConverter { private static final byte[] HEXBYTES = { (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5', (byte) '6', (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f' }; private static int getNibble(int value) { if (value >= '0' && value <= '9') { return value - '0'; } if (value >= 'a' && value <= 'f') { return 10 + value - 'a'; } if (value >= 'A' && value <= 'F') { return 10 + value - 'A'; } return -1; } /** * Converts a hexadecimal string into a byte array * * * @param s hexadecimal string * * @return byte array for the hex string * @throws IOException */ public static byte[] hexStringToByteArray(String s) throws IOException { int l = s.length(); byte[] data = new byte[l / 2 + (l % 2)]; int n, b = 0; boolean high = true; int i = 0; for (int j = 0; j < l; j++) { char c = s.charAt(j); if (c == ' ') { continue; } n = getNibble(c); if (n == -1) { throw new IOException( "hexadecimal string contains non hex character"); //NOI18N } if (high) { b = (n & 0xf) << 4; high = false; } else { b += (n & 0xf); high = true; data[i++] = (byte) b; } } if (!high) { throw new IOException( "hexadecimal string with odd number of characters"); //NOI18N } if (i < data.length) { data = (byte[]) ArrayUtil.resizeArray(data, i); } return data; } /** * Compacts a bit string into a BitMap * * * @param s bit string * * @return byte array for the hex string * @throws IOException */ public static BitMap sqlBitStringToBitMap(String s) throws IOException { int l = s.length(); int n; int bitIndex = 0; BitMap map = new BitMap(l, true); for (int j = 0; j < l; j++) { char c = s.charAt(j); if (c == ' ') { continue; } n = getNibble(c); if (n != 0 && n != 1) { throw new IOException( "hexadecimal string contains non hex character"); //NOI18N } if (n == 1) { map.set(bitIndex); } bitIndex++; } map.setSize(bitIndex); return map; } /** * Converts a byte array into a hexadecimal string * * * @param b byte array * * @return hex string */ public static String byteArrayToHexString(byte[] b) { int len = b.length; char[] s = new char[len * 2]; for (int i = 0, j = 0; i < len; i++) { int c = ((int) b[i]) & 0xff; s[j++] = (char) HEXBYTES[c >> 4 & 0xf]; s[j++] = (char) HEXBYTES[c & 0xf]; } return new String(s); } /** * Converts a byte array into an SQL hexadecimal string * * * @param b byte array * * @return hex string */ public static String byteArrayToSQLHexString(byte[] b) { int len = b.length; char[] s = new char[len * 2 + 3]; s[0] = 'X'; s[1] = '\''; int j = 2; for (int i = 0; i < len; i++) { int c = ((int) b[i]) & 0xff; s[j++] = (char) HEXBYTES[c >> 4 & 0xf]; s[j++] = (char) HEXBYTES[c & 0xf]; } s[j] = '\''; return new String(s); } /** * Converts a byte array into a bit string * * * @param bytes byte array * @param bitCount number of bits * @return hex string */ public static String byteArrayToBitString(byte[] bytes, int bitCount) { char[] s = new char[bitCount]; for (int j = 0; j < bitCount; j++) { byte b = bytes[j / 8]; s[j] = BitMap.isSet(b, j % 8) ? '1' : '0'; } return new String(s); } /** * Converts a byte array into an SQL binary string * * * @param bytes byte array * @param bitCount number of bits * @return hex string */ public static String byteArrayToSQLBitString(byte[] bytes, int bitCount) { char[] s = new char[bitCount + 3]; s[0] = 'B'; s[1] = '\''; int pos = 2; for (int j = 0; j < bitCount; j++) { byte b = bytes[j / 8]; s[pos++] = BitMap.isSet(b, j % 8) ? '1' : '0'; } s[pos] = '\''; return new String(s); } /** * Converts a byte array into hexadecimal characters which are written as * ASCII to the given output stream. * * @param o output array * @param from offset into output array * @param b input array */ public static void writeHexBytes(byte[] o, int from, byte[] b) { int len = b.length; for (int i = 0; i < len; i++) { int c = ((int) b[i]) & 0xff; o[from++] = HEXBYTES[c >> 4 & 0xf]; o[from++] = HEXBYTES[c & 0xf]; } } public static String byteArrayToString(byte[] b, String charset) { try { return (charset == null) ? new String(b) : new String(b, charset); } catch (Exception e) {} return null; } /** * Hsqldb specific encoding used only for log files. The SQL statements that * need to be written to the log file (input) are Java Unicode strings. * input is converted into a 7bit escaped ASCII string (output)with the * following transformations. All characters outside the 0x20-7f range are * converted to a escape sequence and added to output. If a backslash * character is immdediately followed by 'u', the backslash character is * converted to escape sequence and added to output. All the remaining * characters in input are added to output without conversion. The escape * sequence is backslash, letter u, xxxx, where xxxx is the hex * representation of the character code. (fredt@users)

* * Method based on Hypersonic Code * * @param b output stream to wite to * @param s Java string * @param doubleSingleQuotes boolean */ public static void stringToUnicodeBytes(HsqlByteArrayOutputStream b, String s, boolean doubleSingleQuotes) { if (s == null) { return; } final int len = s.length(); int extras = 0; if (len == 0) { return; } b.ensureRoom(len * 2 + 5); for (int i = 0; i < len; i++) { char c = s.charAt(i); if (c == '\\') { if ((i < len - 1) && (s.charAt(i + 1) == 'u')) { b.writeNoCheck(c); // encode the \ as unicode, so 'u' is ignored b.writeNoCheck('u'); b.writeNoCheck('0'); b.writeNoCheck('0'); b.writeNoCheck('5'); b.writeNoCheck('c'); extras += 5; } else { b.write(c); } } else if ((c >= 0x0020) && (c <= 0x007f)) { b.writeNoCheck(c); // this is 99% if (c == '\'' && doubleSingleQuotes) { b.writeNoCheck(c); extras++; } } else { b.writeNoCheck('\\'); b.writeNoCheck('u'); b.writeNoCheck(HEXBYTES[(c >> 12) & 0xf]); b.writeNoCheck(HEXBYTES[(c >> 8) & 0xf]); b.writeNoCheck(HEXBYTES[(c >> 4) & 0xf]); b.writeNoCheck(HEXBYTES[c & 0xf]); extras += 5; } if (extras > len) { b.ensureRoom(len + extras + 5); extras = 0; } } } // fredt@users 20020522 - fix for 557510 - backslash bug // this legacy bug resulted from forward reading the input when a backslash // was present and manifested itself when a backslash was followed // immdediately by a character outside the 0x20-7f range in a database field. /** * Hsqldb specific decoding used only for log files. This method converts * the 7 bit escaped ASCII strings in a log file back into Java Unicode * strings. See stringToUnicodeBytes() above.

* * Method based on Hypersonic Code * * @param s encoded ASCII string in byte array * @return Java string */ public static String unicodeStringToString(String s) { if ((s == null) || (s.indexOf("\\u") == -1)) { return s; } int len = s.length(); char[] b = new char[len]; int j = 0; for (int i = 0; i < len; i++) { char c = s.charAt(i); if (c == '\\' && i < len - 5) { char c1 = s.charAt(i + 1); if (c1 == 'u') { i++; // 4 characters read should always return 0-15 int k = getNibble(s.charAt(++i)) << 12; k += getNibble(s.charAt(++i)) << 8; k += getNibble(s.charAt(++i)) << 4; k += getNibble(s.charAt(++i)); b[j++] = (char) k; } else { b[j++] = c; } } else { b[j++] = c; } } return new String(b, 0, j); } public static String readUTF(byte[] bytearr, int offset, int length) throws IOException { char[] buf = new char[length]; return readUTF(bytearr, offset, length, buf); } public static String readUTF(byte[] bytearr, int offset, int length, char[] buf) throws IOException { int bcount = 0; int c, char2, char3; int count = 0; while (count < length) { c = (int) bytearr[offset + count]; if (bcount == buf.length) { buf = (char[]) ArrayUtil.resizeArray(buf, length); } if (c > 0) { /* 0xxxxxxx*/ count++; buf[bcount++] = (char) c; continue; } c &= 0xff; switch (c >> 4) { case 12 : case 13 : /* 110x xxxx 10xx xxxx*/ count += 2; if (count > length) { throw new UTFDataFormatException(); } char2 = (int) bytearr[offset + count - 1]; if ((char2 & 0xC0) != 0x80) { throw new UTFDataFormatException(); } buf[bcount++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14 : /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > length) { throw new UTFDataFormatException(); } char2 = (int) bytearr[offset + count - 2]; char3 = (int) bytearr[offset + count - 1]; if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { throw new UTFDataFormatException(); } buf[bcount++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; default : /* 10xx xxxx, 1111 xxxx */ throw new UTFDataFormatException(); } } // The number of chars produced may be less than length return new String(buf, 0, bcount); } /** * Writes a string to the specified DataOutput using UTF-8 encoding in a * machine-independent manner. *

* @param str a string to be written. * @param out destination to write to * @return The number of bytes written out. */ public static int stringToUTFBytes(String str, HsqlByteArrayOutputStream out) { int strlen = str.length(); int c, count = 0; if (out.count + strlen + 8 > out.buffer.length) { out.ensureRoom(strlen + 8); } char[] arr = str.toCharArray(); for (int i = 0; i < strlen; i++) { c = arr[i]; if (c >= 0x0001 && c <= 0x007F) { out.buffer[out.count++] = (byte) c; count++; } else if (c > 0x07FF) { out.buffer[out.count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); out.buffer[out.count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); count += 3; } else { out.buffer[out.count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); count += 2; } if (out.count + 8 > out.buffer.length) { out.ensureRoom(strlen - i + 8); } } return count; } public static int getUTFSize(String s) { int len = (s == null) ? 0 : s.length(); int l = 0; for (int i = 0; i < len; i++) { int c = s.charAt(i); if ((c >= 0x0001) && (c <= 0x007F)) { l++; } else if (c > 0x07FF) { l += 3; } else { l += 2; } } return l; } /** * Using an output stream, returns a String from an InputStream. * * @param is InputStream to read from * @param encoding character encoding of the string * @throws IOException * @return a Java string */ public static String inputStreamToString(InputStream is, String encoding) throws IOException { HsqlByteArrayOutputStream baOS = new HsqlByteArrayOutputStream(1024); while (true) { int c = is.read(); if (c == -1) { break; } baOS.write(c); } return new String(baOS.getBuffer(), 0, baOS.size(), encoding); } // fredt@users 20020130 - patch 497872 by Nitin Chauhan - use byte[] of exact size /** * Returns the quoted version of the string using the quotechar argument. * doublequote argument indicates whether each instance of quotechar inside * the string is doubled.

* * null string argument returns null. If the caller needs the literal * "NULL" it should created it itself

* * @param s Java string * @param quoteChar character used for quoting * @param extraQuote true if quoteChar itself should be repeated * @return String */ public static String toQuotedString(String s, char quoteChar, boolean extraQuote) { if (s == null) { return null; } int count = extraQuote ? count(s, quoteChar) : 0; int len = s.length(); char[] b = new char[2 + count + len]; int i = 0; int j = 0; b[j++] = quoteChar; for (; i < len; i++) { char c = s.charAt(i); b[j++] = c; if (extraQuote && c == quoteChar) { b[j++] = c; } } b[j] = quoteChar; return new String(b); } /** * Counts Character c in String s * * @param s Java string * @param c character to count * @return int count */ static int count(final String s, final char c) { int pos = 0; int count = 0; if (s != null) { while ((pos = s.indexOf(c, pos)) > -1) { count++; pos++; } } return count; } /** * Converts the string to an HTML representation in the ASCII character set * and appends it to a byte array output stream. * * @param b the output byte array output stream * @param s the input string */ public static void stringToHtmlBytes(HsqlByteArrayOutputStream b, String s) { if (s == null) { return; } final int len = s.length(); char[] chars; if (len == 0) { return; } chars = s.toCharArray(); b.ensureRoom(len); for (int i = 0; i < len; i++) { char c = chars[i]; if (c > 0x007f || c == '"' || c == '&' || c == '<' || c == '>') { int codePoint = Character.codePointAt(chars, i); if (Character.charCount(codePoint) == 2) { i++; } b.ensureRoom(16); b.writeNoCheck('&'); b.writeNoCheck('#'); b.writeBytes(String.valueOf(codePoint)); b.writeNoCheck(';'); } else if (c < 0x0020 ) { b.writeNoCheck(' '); } else { b.writeNoCheck(c); } } } /** * Returns a string representation in UUID form from a binary string. * * UUID string is composed of 8-4-4-4-12 hexadecimal characters. * * @param b the byte array * @return UUID string form */ public static String toStringUUID(byte[] b) { char[] chars = new char[36]; int hexIndex; if (b == null) { return null; } if (b.length != 16) { throw new NumberFormatException(); } for (int i = 0, j = 0; i < b.length; ) { hexIndex = (b[i] & 0xf0) >> 4; chars[j++] = (char) HEXBYTES[hexIndex]; hexIndex = b[i] & 0xf; chars[j++] = (char) HEXBYTES[hexIndex]; i++; if (i >= 4 && i <= 10 && (i % 2) == 0) { chars[j++] = '-'; } } return new String(chars); } /** * Returns a byte[] representation in UUID form from a UUID string. * * @param s the UUID string * @return byte array */ public static byte[] toBinaryUUID(String s) { byte[] bytes = new byte[16]; if (s == null) { return null; } if (s.length() != 36) { throw new NumberFormatException(); } for (int i = 0, j = 0; i < bytes.length; ) { char c = s.charAt(j++); int high = getNibble(c); c = s.charAt(j++); bytes[i] = (byte) ((high << 4) + getNibble(c)); i++; if (i >= 4 && i <= 10 && (i % 2) == 0) { c = s.charAt(j++); if (c != '-') {} } } return bytes; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy