com.sleepycat.util.UtfOps Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of je Show documentation
Show all versions of je Show documentation
Berkley Database Java Edition - build and runtime support.
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
*
*/
package com.sleepycat.util;
/**
* UTF operations with more flexibility than is provided by DataInput and
* DataOutput.
*
* @author Mark Hayes
*/
public class UtfOps {
private static byte[] EMPTY_BYTES = {};
private static String EMPTY_STRING = "";
/**
* Returns the byte length of a null terminated UTF string, not including
* the terminator.
*
* @param bytes the data containing the UTF string.
*
* @param offset the beginning of the string the measure.
*
* @throws IndexOutOfBoundsException if no zero terminator is found.
*
* @return the number of bytes.
*/
public static int getZeroTerminatedByteLength(byte[] bytes, int offset)
throws IndexOutOfBoundsException {
int len = 0;
while (bytes[offset++] != 0) {
len++;
}
return len;
}
/**
* Returns the byte length of the UTF string that would be created by
* converting the given characters to UTF.
*
* @param chars the characters that would be converted.
*
* @return the byte length of the equivalent UTF data.
*/
public static int getByteLength(char[] chars) {
return getByteLength(chars, 0, chars.length);
}
/**
* Returns the byte length of the UTF string that would be created by
* converting the given characters to UTF.
*
* @param chars the characters that would be converted.
*
* @param offset the first character to be converted.
*
* @param length the number of characters to be converted.
*
* @return the byte length of the equivalent UTF data.
*/
public static int getByteLength(char[] chars, int offset, int length) {
int len = 0;
length += offset;
for (int i = offset; i < length; i++) {
int c = chars[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
len++;
} else if (c > 0x07FF) {
len += 3;
} else {
len += 2;
}
}
return len;
}
/**
* Returns the number of characters represented by the given UTF string.
*
* @param bytes the UTF string.
*
* @return the number of characters.
*
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
*
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static int getCharLength(byte[] bytes)
throws IllegalArgumentException, IndexOutOfBoundsException {
return getCharLength(bytes, 0, bytes.length);
}
/**
* Returns the number of characters represented by the given UTF string.
*
* @param bytes the data containing the UTF string.
*
* @param offset the first byte to be converted.
*
* @param length the number of byte to be converted.
*
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
*
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static int getCharLength(byte[] bytes, int offset, int length)
throws IllegalArgumentException, IndexOutOfBoundsException {
int charCount = 0;
length += offset;
while (offset < length) {
switch ((bytes[offset] & 0xff) >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
offset++;
break;
case 12: case 13:
offset += 2;
break;
case 14:
offset += 3;
break;
default:
throw new IllegalArgumentException();
}
charCount++;
}
return charCount;
}
/**
* Converts byte arrays into character arrays.
*
* @param bytes the source byte data to convert
*
* @param byteOffset the offset into the byte array at which
* to start the conversion
*
* @param chars the destination array
*
* @param charOffset the offset into chars at which to begin the copy
*
* @param len the amount of information to copy into chars
*
* @param isByteLen if true then len is a measure of bytes, otherwise
* len is a measure of characters
*
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
*
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static int bytesToChars(byte[] bytes, int byteOffset,
char[] chars, int charOffset,
int len, boolean isByteLen)
throws IllegalArgumentException, IndexOutOfBoundsException {
int char1, char2, char3;
len += isByteLen ? byteOffset : charOffset;
while ((isByteLen ? byteOffset : charOffset) < len) {
char1 = bytes[byteOffset++] & 0xff;
switch ((char1 & 0xff) >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
chars[charOffset++] = (char) char1;
break;
case 12: case 13:
char2 = bytes[byteOffset++];
if ((char2 & 0xC0) != 0x80) {
throw new IllegalArgumentException();
}
chars[charOffset++] = (char)(((char1 & 0x1F) << 6) |
(char2 & 0x3F));
break;
case 14:
char2 = bytes[byteOffset++];
char3 = bytes[byteOffset++];
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
throw new IllegalArgumentException();
chars[charOffset++] = (char)(((char1 & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F) << 0));
break;
default:
throw new IllegalArgumentException();
}
}
return byteOffset;
}
/**
* Converts character arrays into byte arrays.
*
* @param chars the source character data to convert
*
* @param charOffset the offset into the character array at which
* to start the conversion
*
* @param bytes the destination array
*
* @param byteOffset the offset into bytes at which to begin the copy
*
* @param charLength the length of characters to copy into bytes
*/
public static void charsToBytes(char[] chars, int charOffset,
byte[] bytes, int byteOffset,
int charLength) {
charLength += charOffset;
for (int i = charOffset; i < charLength; i++) {
int c = chars[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
bytes[byteOffset++] = (byte) c;
} else if (c > 0x07FF) {
bytes[byteOffset++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
bytes[byteOffset++] = (byte) (0x80 | ((c >> 6) & 0x3F));
bytes[byteOffset++] = (byte) (0x80 | ((c >> 0) & 0x3F));
} else {
bytes[byteOffset++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
bytes[byteOffset++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
}
}
/**
* Converts byte arrays into strings.
*
* @param bytes the source byte data to convert
*
* @param offset the offset into the byte array at which
* to start the conversion
*
* @param length the number of bytes to be converted.
*
* @return the string.
*
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
*
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static String bytesToString(byte[] bytes, int offset, int length)
throws IllegalArgumentException, IndexOutOfBoundsException {
if (length == 0) return EMPTY_STRING;
int charLen = UtfOps.getCharLength(bytes, offset, length);
char[] chars = new char[charLen];
UtfOps.bytesToChars(bytes, offset, chars, 0, length, true);
return new String(chars, 0, charLen);
}
/**
* Converts strings to byte arrays.
*
* @param string the string to convert.
*
* @return the UTF byte array.
*/
public static byte[] stringToBytes(String string) {
if (string.length() == 0) return EMPTY_BYTES;
char[] chars = string.toCharArray();
byte[] bytes = new byte[UtfOps.getByteLength(chars)];
UtfOps.charsToBytes(chars, 0, bytes, 0, chars.length);
return bytes;
}
}