com.novell.ldap.util.Base64 Maven / Gradle / Ivy
/* **************************************************************************
* $OpenLDAP: pkg/jldap/com/novell/ldap/util/Base64.java,v 1.4 2002/11/11 15:39:25 vtag Exp $
*
* Copyright (C) 2002 Novell, Inc. All Rights Reserved.
*
* THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND
* TREATIES. USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT
* TO VERSION 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS
* AVAILABLE AT HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE"
* IN THE TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION
* OF THIS WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP
* PUBLIC LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT
* THE PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY.
******************************************************************************/
package com.novell.ldap.util;
import java.io.UnsupportedEncodingException;
/**
* The Base64 utility class performs base64 encoding and decoding.
*
* The Base64 Content-Transfer-Encoding is designed to represent
* arbitrary sequences of octets in a form that need not be humanly
* readable. The encoding and decoding algorithms are simple, but the
* encoded data are consistently only about 33 percent larger than the
* unencoded data. The base64 encoding algorithm is defined by
* RFC 2045.
*/
public class Base64
{
/** Conversion table for encoding to base64.
*
* emap is a six-bit value to base64 (8-bit) converstion table.
* For example, the value of the 6-bit value 15
* is mapped to 0x50 which is the ASCII letter 'P', i.e. the letter P
* is the base64 encoded character that represents the 6-bit value 15.
*//*
* 8-bit base64 encoded character base64 6-bit
* encoded original
* character binary value
*/
private static final char emap[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // A-H; 0 - 7
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // I-P; 8 -15
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // Q-X; 16-23
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // YZ, a-f; 24-31
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // g-n; 32-39
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // o-v; 40-47
'w', 'x', 'y', 'z', '0', '1', '2', '3', // w-z, 0-3; 48-55
'4', '5', '6', '7', '8', '9', '+', '/'}; // 4-9, + /; 56-63
/** conversion table for decoding from base64.
*
* dmap is a base64 (8-bit) to six-bit value converstion table.
* For example the ASCII character 'P' has a value of 80.
* The value in the 80th position of the table is 0x0f or 15.
* 15 is the original 6-bit value that the letter 'P' represents.
*//*
* 6-bit decoded value base64 base64
* encoded character
* value
*
* Note: about half of the values in the table are only place holders
*/
private static final byte dmap[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0 -7
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8 -15
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 16-23
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 24-31
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 31-39
0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x3f, // 40-47 ' + /'
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, // 48-55 '01234567'
0x3c, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 56-63 '89 '
0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, // 64-71 ' ABCDEFG'
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, // 72-79 'HIJKLMNO'
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, // 80-87 'PQRSTUVW'
0x17, 0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, // 88-95 'XYZ '
0x00, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, // 96-103 ' abcdefg'
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, // 104-111 'hijllmno'
0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, // 112-119 'pqrstuvw'
0x31, 0x32, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00};// 120-127 'xyz '
/**
* Default constructor, don't allow instances of the
* utility class to be created.
*/
private Base64() {
return;
}
/**
* Encodes the specified String into a base64 encoded String object.
*
* @param inputString The String object to be encoded.
*
* @return a String containing the encoded value of the input.
*/
public static final String encode(String inputString)
{
try {
return encode(inputString.getBytes("UTF-8"));
} catch( UnsupportedEncodingException ue) {
throw new RuntimeException(
"US-ASCII String encoding not supported by JVM");
}
}
/**
* Encodes the specified bytes into a base64 array of bytes.
* Each byte in the return array represents a base64 character.
*
* @param inputBytes the byte array to be encoded.
*
* @return a String containing the base64 encoded data
*/
public static final String encode(byte[] inputBytes)
{
int i, j, k;
int t, t1,t2;
int ntb; // number of three-bytes in inputBytes
boolean onePadding = false, twoPaddings = false;
char[] encodedChars; // base64 encoded chars
int len = inputBytes.length;
if( len == 0) {
// No data, return no data.
return new String("");
}
// every three bytes will be encoded into four bytes
if ( len%3 == 0 ) {
ntb = len / 3;
}
// the last one or two bytes will be encoded into
// four bytes with one or two paddings
else {
ntb = len / 3 + 1;
}
// need two paddings
if( (len%3) == 1 ) {
twoPaddings = true;
}
// need one padding
else if ( (len%3) == 2 ) {
onePadding = true;
}
encodedChars = new char[ntb*4];
// map of decoded and encoded bits
// bits in 3 decoded bytes: 765432 107654 321076 543210
// bits in 4 encoded bytes: 76543210765432107654321076543210
// plain "AAA": 010000 010100 000101 000001
// base64 encoded "QUFB": 00010000000101000000010100000001
// one padding:
// bits in 2 decoded bytes: 765432 10 7654 3210
// bits in 4 encoded bytes: 765432107654 321076543210 '='
// plain "AA": 010000 010100 0001
// base64 encoded "QUE=": 00010000000101000000010000111101
// two paddings:
// bits in 1 decoded bytes: 765432 10
// bits in 4 encoded bytes: 7654321076543210 '=' '='
// plain "A": 010000 01
// base64 encoded "QQ==": 00010000000100000011110100111101
//
// note: the encoded bits which have no corresponding decoded bits
// are filled with zeros; '=' = 00111101.
for ( i = 0, j = 0, k = 1; i < len; i+=3, j+=4, k++) {
// build encodedChars[j]
t = 0x00ff & inputBytes[i];
encodedChars[j] = emap[t >> 2];
// build encodedChars[j+1]
if ( (k==ntb) && twoPaddings) {
encodedChars[j+1] = emap[(t&0x03) << 4];
encodedChars[j+2] = '=';
encodedChars[j+3] = '=';
break;
}
else {
t1 = 0x00ff & inputBytes[i+1];
encodedChars[j+1] =
emap[((t&0x03) << 4) +( (t1&0xf0) >> 4) ];
}
// build encodedChars[j+2]
if((k==ntb) && onePadding) {
encodedChars[j+2] = emap[(t1&0x0f) << 2];
encodedChars[j+3] = '=';
break;
}
else {
t2 = 0x00ff & inputBytes[i+2];
encodedChars[j+2] =
(emap[(t1&0x0f) << 2 | (t2&0xc0) >> 6]);
}
// build encodedChars[j+3]
encodedChars[j+3] = (emap[(t2&0x3f)]);
}
return new String(encodedChars);
}
/**
* Decodes the input base64 encoded String.
* The resulting binary data is returned as an array of bytes.
*
* @param encodedString The base64 encoded String object.
*
* @return The decoded byte array.
*/
public static final byte[] decode(String encodedString)
{
char[] c = new char[encodedString.length()];
encodedString.getChars(0, encodedString.length(), c, 0);
return decode(c);
}
/**
* Decodes the input base64 encoded array of characters.
* The resulting binary data is returned as an array of bytes.
*
* @param encodedChars The character array containing the base64 encoded data.
*
* @return A byte array object containing decoded bytes.
*/
public static final byte[] decode(char[] encodedChars)
{
int i, j, k;
int ecLen = encodedChars.length; // length of encodedChars
int gn = ecLen/4; // number of four-byte groups in encodedChars
int dByteLen; // length of decoded bytes, default is '0'
boolean onePad = false, twoPads = false;
byte[] decodedBytes; // decoded bytes
if( encodedChars.length == 0) {
return new byte[0];
}
// the number of encoded bytes should be multiple of 4
if ( (ecLen%4) != 0) {
throw new RuntimeException("com.novell.ldap.ldif_dsml."
+ "Base64Decoder: decode: mal-formatted encode value");
}
// every four-bytes in encodedString, except the last one if it in the
// form of '**==' or '***=' ( can't be '*===' or '===='), will be
// decoded into three bytes.
if ( (encodedChars[ecLen-1] == (int)'=') &&
(encodedChars[ecLen-2] == (int)'=') ) {
// the last four bytes of encodedChars is in the form of '**=='
twoPads = true;
// the first two bytes of the last four-bytes of encodedChars will
// be decoded into one byte.
dByteLen = gn * 3 - 2;
decodedBytes = new byte[dByteLen];
} else
if ( encodedChars[ecLen-1] == '=' ) {
// the last four bytes of encodedChars is in the form of '***='
onePad = true;
// the first two bytes of the last four-bytes of encodedChars will
// be decoded into two bytes.
dByteLen = gn * 3 - 1;
decodedBytes = new byte[dByteLen];
} else {
// the last four bytes of encodedChars is in the form of '****',
// e.g. no pad.
dByteLen = gn * 3;
decodedBytes = new byte[dByteLen];
}
// map of encoded and decoded bits
// no padding:
// bits in 4 encoded bytes: 76543210 76543210 76543210 76543210
// bits in 3 decoded bytes: 765432 107654 321076 543210
// base64 string "QUFB":00010000 00010100 000001010 0000001
// plain string "AAA": 010000 010100 000101 000001
// one padding:
// bits in 4 encoded bytes: 76543210 76543210 76543210 76543210
// bits in 2 decoded bytes: 765432 107654 3210
// base64 string "QUE=": 00010000 000101000 0000100 00111101
// plain string "AA": 010000 010100 0001
// two paddings:
// bits in 4 encoded bytes: 76543210 76543210 76543210 76543210
// bits in 1 decoded bytes: 765432 10
// base64 string "QQ==": 00010000 00010000 00111101 00111101
// plain string "A": 010000 01
for ( i=0, j=0, k=1; i>4 );
// build decodedBytes[j+1]
if ( (k==gn) && twoPads ){
break;
} else {
decodedBytes[j+1] = (byte)( (dmap[encodedChars[i+1]]&0x0f)<<4
|(dmap[encodedChars[i+2]]&0x3c)>>2 );
}
// build decodedBytes[j+2]
if( (k==gn) && onePad ) {
break;
} else {
decodedBytes[j+2] = (byte)( (dmap[encodedChars[i+2]]&0x03)<<6
| dmap[encodedChars[i+3]]&0x3f);
}
}
return decodedBytes;
}
/**
* Decodes a base64 encoded StringBuffer.
* Decodes all or part of the input base64 encoded StringBuffer, each
* Character value representing a base64 character. The resulting
* binary data is returned as an array of bytes.
*
* @param encodedSBuf The StringBuffer object that contains base64
* encoded data.
* @param start The start index of the base64 encoded data.
* @param end The end index + 1 of the base64 encoded data.
*
* @return The decoded byte array
*/
public static final byte[] decode(StringBuffer encodedSBuf, int start, int end)
{
int i, j, k;
int esbLen = end - start;// length of the encoded part
int gn = esbLen/4; // number of four-bytes group in ebs
int dByteLen; // length of dbs, default is '0'
boolean onePad = false, twoPads = false;
byte[] decodedBytes; // decoded bytes
if( encodedSBuf.length() == 0) {
return new byte[0];
}
// the number of encoded bytes should be multiple of number 4
if ( (esbLen%4) != 0) {
throw new RuntimeException("com.novell.ldap.ldif_dsml."
+ "Base64Decoder: decode error: mal-formatted encode value");
}
// every four-bytes in ebs, except the last one if it in the form of
// '**==' or '***=' ( can't be '*===' or '===='), will be decoded into
// three bytes.
if ( (encodedSBuf.charAt(end-1) == (int)'=') &&
(encodedSBuf.charAt(end-2) == (int)'=') ) {
// the last four bytes of ebs is in the form of '**=='
twoPads = true;
// the first two bytes of the last four-bytes of ebs will be
// decoded into one byte.
dByteLen = gn * 3 - 2;
decodedBytes = new byte[dByteLen];
}
else if ( encodedSBuf.charAt(end-1) == (int)'=' ) {
// the last four bytes of ebs is in the form of '***='
onePad = true;
// the first two bytes of the last four-bytes of ebs will be
// decoded into two bytes.
dByteLen = gn * 3 - 1;
decodedBytes = new byte[dByteLen];
}
else {
// the last four bytes of ebs is in the form of '****', eg. no pad.
dByteLen = gn * 3;
decodedBytes = new byte[dByteLen];
}
// map of encoded and decoded bits
// no padding:
// bits in 4 encoded bytes: 76543210 76543210 76543210 76543210
// bits in 3 decoded bytes: 765432 107654 321076 543210
// base64 string "QUFB":00010000 00010100 000001010 0000001
// plain string "AAA": 010000 010100 000101 000001
// one padding:
// bits in 4 encoded bytes: 76543210 76543210 76543210 76543210
// bits in 2 decoded bytes: 765432 107654 3210
// base64 string "QUE=": 00010000 000101000 0000100 00111101
// plain string "AA": 010000 010100 0001
// two paddings:
// bits in 4 encoded bytes: 76543210 76543210 76543210 76543210
// bits in 1 decoded bytes: 765432 10
// base64 string "QQ==": 00010000 00010000 00111101 00111101
// plain string "A": 010000 01
for ( i=0, j=0, k=1; i>4 );
// build decodedBytes[j+1]
if ( (k==gn) && twoPads ){
break;
}
else {
decodedBytes[j+1] =
(byte)( (dmap[encodedSBuf.charAt(start+i+1)]&0x0f)<<4
| (dmap[encodedSBuf.charAt(start+i+2)]&0x3c)>>2);
}
// build decodedBytes[j+2]
if( (k==gn) && onePad ) {
break;
}
else {
decodedBytes[j+2] =
(byte)( (dmap[encodedSBuf.charAt(start+i+2)]&0x03)<<6
| dmap[encodedSBuf.charAt(start+i+3)]&0x3f);
}
}
return decodedBytes;
}
/**
* Checks if the input byte array contains only safe values, that is,
* the data does not need to be encoded for use with LDIF.
* The rules for checking safety are based on the rules for LDIF
* (LDAP Data Interchange Format) per RFC 2849. The data does
* not need to be encoded if all the following are true:
*
* The data cannot start with the following byte values:
*
* 00 (NUL)
* 10 (LF)
* 13 (CR)
* 32 (SPACE)
* 58 (:)
* 60 (<)
* Any character with value greater than 127
* (Negative for a byte value)
*
* The data cannot contain any of the following byte values:
*
* 00 (NUL)
* 10 (LF)
* 13 (CR)
* Any character with value greater than 127
* (Negative for a byte value)
*
* The data cannot end with a space.
*
* @param bytes the bytes to be checked.
*
* @return true if encoding not required for LDIF
*/
public static final boolean isLDIFSafe(byte[] bytes)
{
int len = bytes.length;
if( len > 0) {
int testChar = bytes[0];
// unsafe if first character is a NON-SAFE-INIT-CHAR
if ( (testChar == 0x00) // NUL
|| (testChar == 0x0A) // linefeeder
|| (testChar == 0x0D) // carrage return
|| (testChar == 0x20) // space(' ')
|| (testChar == 0x3A) // colon(':')
|| (testChar == 0x3C) // less-than('<')
|| (testChar < 0)) { // non ascii (>127 is negative)
return false;
}
// unsafe if last character is a space
if( bytes[len-1] == ' ') {
return false;
}
// unsafe if contains any non safe character
if( len > 1){
for ( int i = 1; i < bytes.length; i++ ) {
testChar = bytes[i];
if ( (testChar == 0x00) // NUL
|| (testChar == 0x0A) // linefeeder
|| (testChar == 0x0D) // carrage return
|| (testChar < 0)) { // non ascii (>127 is negative)
return false;
}
}
}
}
return true;
}
/**
* Checks if the input String contains only safe values, that is,
* the data does not need to be encoded for use with LDIF.
* The rules for checking safety are based on the rules for LDIF
* (LDAP Data Interchange Format) per RFC 2849. The data does
* not need to be encoded if all the following are true:
*
* The data cannot start with the following char values:
*
* 00 (NUL)
* 10 (LF)
* 13 (CR)
* 32 (SPACE)
* 58 (:)
* 60 (<)
* Any character with value greater than 127
*
* The data cannot contain any of the following char values:
*
* 00 (NUL)
* 10 (LF)
* 13 (CR)
* Any character with value greater than 127
*
* The data cannot end with a space.
*
* @param str the String to be checked.
*
* @return true if encoding not required for LDIF
*/
public static final boolean isLDIFSafe(String str)
{
try {
return( isLDIFSafe(str.getBytes("UTF-8")));
} catch( UnsupportedEncodingException ue) {
throw new RuntimeException(
"UTF-8 String encoding not supported by JVM");
}
}
/* **************UTF-8 Validation methods and members*******************
* The following text is taken from draft-yergeau-rfc2279bis-02 and explains
* UTF-8 encoding:
*
*In UTF-8, characters are encoded using sequences of 1 to 6 octets.
* If the range of character numbers is restricted to U+0000..U+10FFFF
* (the UTF-16 accessible range), then only sequences of one to four
* octets will occur. The only octet of a "sequence" of one has the
* higher-order bit set to 0, the remaining 7 bits being used to encode
* the character number. In a sequence of n octets, n>1, the initial
* octet has the n higher-order bits set to 1, followed by a bit set to
* 0. The remaining bit(s) of that octet contain bits from the number
* of the character to be encoded. The following octet(s) all have the
* higher-order bit set to 1 and the following bit set to 0, leaving 6
* bits in each to contain bits from the character to be encoded.
*
* The table below summarizes the format of these different octet types.
* The letter x indicates bits available for encoding bits of the
* character number.
*
*
* Char. number range | UTF-8 octet sequence
* (hexadecimal) | (binary)
* --------------------+---------------------------------------------
* 0000 0000-0000 007F | 0xxxxxxx
* 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
* 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
* 0001 0000-001F FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0020 0000-03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 0400 0000-7FFF FFFF | 1111110x 10xxxxxx ... 10xxxxxx
*
*/
/**
* Given the first byte in a sequence, getByteCount returns the number of
* additional bytes in a UTF-8 character sequence (not including the first
* byte).
*
* @param b The first byte in a UTF-8 character sequence.
*
* @return the number of additional bytes in a UTF-8 character sequence.
*/
private static int getByteCount(byte b)
{
if (b>0) return 0;
if ((b & 0xE0) == 0xC0){
return 1; //one additional byte (2 bytes total)
}
if ((b & 0xF0) == 0xE0){
return 2; //two additional bytes (3 bytes total)
}
if ((b & 0xF8) == 0xF0){
return 3; //three additional bytes (4 bytes total)
}
if ((b & 0xFC) == 0xF8){
return 4; //four additional bytes (5 bytes total)
}
if ((b & 0xFF) == 0xFC){
return 5; //five additional bytes (6 bytes total)
}
return -1;
}
/**
* Bit masks used to determine if a the value of UTF-8 byte sequence
* is less than the minimum value.
*
* If the value of a byte sequence is less than the minimum value then
* the number should be encoded in fewer bytes and is invalid. For example
* If the first byte indicates that a sequence has three bytes in a
* sequence. Then the top five bits cannot be zero. Notice the index into
* the array is one less than the number of bytes in a sequence.
* A validity test for this could be:
*
* if ((lowerBoundMask[1][0] & byte[0] != 0) ||
* (lowerBoundMask[1][1] & byte[1] != 0)) {
* then the value is above the minimum and is valid.
* }
*
*/
private static final byte lowerBoundMask[][] = new byte[][]
{
{0,0}, //firstmask is empty;
{0x1E, 0x00}, //checks top four bits for a value
{0x0F, 0x20}, //checks top five bits
{0x07, 0x30}, //checks top five bits
{0x02, 0x38}, //checks top five bits
{0x01, 0x3C} //checks top five bits
};
/** mask to AND with a continuation byte: should equal continuationResult */
private static final byte continuationMask = (byte) 0xC0;
/** expected result of ANDing a continuation byte with continuationMask */
private static final byte continuationResult = (byte) 0x80;
/**
* Determines if an array of bytes contains only valid UTF-8 characters.
*
* UTF-8 is the standard encoding for LDAP strings. If a value contains
* data that is not valid UTF-8 then data is lost converting the
* value to a Java String.
*
*
* In addition, Java Strings currently use UCS2 (Unicode Code Standard
* 2-byte characters). UTF-8 can be encoded as USC2 and UCS4 (4-byte
* characters). Some valid UTF-8 characters cannot be represented as UCS2
* characters. To determine if all UTF-8 sequences can be encoded into
* UCS2 characters (a Java String), specify the isUCS2Only
* parameter as true
.
*
* @param array An array of bytes that are to be tested for valid UTF-8
* encoding.
*
* @param isUCS2Only true if the UTF-8 values must be restricted to fit
* within UCS2 encoding (2 bytes)
* @return true if all values in the byte array are valid UTF-8
* sequences. If isUCS2Only
is
* true
, the method returns false if a UTF-8
* sequence generates any character that cannot be
* represented as a UCS2 character (Java String)
*/
public static boolean isValidUTF8(byte[] array, boolean isUCS2Only)
{
int index = 0;
while (index < array.length){
int count = getByteCount(array[index]);
if (count == 0){
//anything that qualifies as count=0 is valid UTF-8
index++;
continue;
}
if (count == -1 || index + count >= array.length ||
(isUCS2Only && count >= 3) ){
/* Any count that puts us out of bounds for the index is
* invalid. Valid UCS2 characters can only have 2 additional
* bytes. (three total) */
return false;
}
/* Tests if the first and second byte are below the minimum bound */
if ((lowerBoundMask[count][0] & array[index] ) == 0 &&
(lowerBoundMask[count][1] & array[index+1]) == 0) {
return false;
}
/* testing continuation on the second and following bytes */
for(int i=1; i<=count; i++){
if ((array[index+i] & continuationMask) != continuationResult){
return false;
}
}
index += count + 1;
}
return true;
}
}