
org.seppiko.commons.utils.codec.Base32 Maven / Gradle / Ivy
/*
* Copyright 2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.seppiko.commons.utils.codec;
import java.io.Serial;
import java.io.Serializable;
import java.util.Objects;
import org.seppiko.commons.utils.CharUtil;
/**
* Base32 encoding and decoding.
*
* @see RFC4648 §6
* @see RFC4648 §7
* @see Base32
* @author Leonard Woo
*/
public class Base32 implements BaseNCodec, Serializable {
@Serial
private static final long serialVersionUID = -5230113688025284966L;
/**
* This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet"
* equivalents as specified in Table 3 of RFC 4648.
*/
private static final byte[] RFC4648_ENCODE_TABLE = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'2', '3', '4', '5', '6', '7',
};
/**
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified
* in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32
* alphabet but fall within the bounds of the array are translated to -1.
*/
private static final byte[] RFC4648_DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z
-1, -1, -1, -1, -1, // 5b-5f
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60-6f a-o
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70-7a p-z
};
/**
* This array is a lookup table that translates 5-bit positive integer index values into their
* "Base32 Hex Alphabet" equivalents as specified in Table 4 of RFC 4648.
*/
private static final byte[] RFC4648_HEX_ENCODE_TABLE = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
};
/**
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as
* specified in Table 4 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the
* Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
*/
private static final byte[] RFC4648_HEX_DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
25, 26, 27, 28, 29, 30, 31, // 50-56 P-V
-1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
25, 26, 27, 28, 29, 30, 31 // 70-76 p-v
};
/**
* z-base-32 is a Base32 encoding designed by Zooko Wilcox-O'Hearn to be easier for human use
* and more compact. It includes 1, 8 and 9 but excludes l, v and 2.
* It also permutes the alphabet so that the easier characters are the ones that occur more
* frequently.
*/
private static final byte[] ZBASE32_ENCODE_TABLE = {
'y', 'b', 'n', 'd', 'r', 'f', 'g', '8', 'e', 'j', 'k', 'm', 'c',
'p', 'q', 'x', 'o', 't', '1', 'u', 'w', 'i', 's', 'z', 'a', '3',
'4', '5', 'h', '7', '6', '9',
};
/**
* This array is a lookup table that translates Unicode characters drawn from the "Z-Base32 Alphabet"
* into their 5-bit positive integer equivalents. Characters that are not in the Z-Base32 alphabet
* but fall within the bounds of the array are translated to -1.
*/
private static final byte[] ZBASE32_DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
-1, 18, -1, 25, 26, 27, 30, 29, 7, 31, -1, -1, -1, -1, -1, -1, // 30-3f 1-9
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z
-1, -1, -1, -1, -1, // 5b-5f
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60-6f a-o
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70-7a p-z
};
/**
* The most widely used Base32 alphabet is defined in RFC 4648. It uses an alphabet of A–Z,
* followed by 2–7. The digits 0, 1 and 8 are skipped due to their similarity with the letters O,
* I and B (thus "2" has a decimal value of 26).
*/
public static final Base32 RFC4648 = new Base32(RFC4648_ENCODE_TABLE, RFC4648_DECODE_TABLE);
/**
* The most widely used Base32 alphabet is defined in RFC 4648. It uses a digit of 0-9,
* followed by A–V.
*/
public static final Base32 RFC4648_HEX = new Base32(RFC4648_HEX_ENCODE_TABLE, RFC4648_HEX_DECODE_TABLE);
/**
* z-base-32 is a Base32 encoding designed by Zooko Wilcox-O'Hearn to be easier for human use
* and more compact. It includes 1, 8 and 9 but excludes l, v and 2.
* It also permutes the alphabet so that the easier characters are the ones that occur more
* frequently.
*
* @see human-oriented base-32 encoding
*/
public static final Base32 ZBASE32 = new Base32(ZBASE32_ENCODE_TABLE, ZBASE32_DECODE_TABLE);
/** base32 encode table */
private final byte[] encodeTable;
/** base32 decode table */
private final byte[] decodeTable;
/** base32 padding char */
private final char padding;
private Base32(byte[] encodeTable, byte[] decodeTable) {
if (encodeTable.length != 32) {
throw new IllegalArgumentException("base32 alphabet must have 32 chars.");
}
this.encodeTable = encodeTable;
this.decodeTable = decodeTable;
this.padding = CharUtil.EQUALS_SIGN;
}
/**
* Base32-encode the given data and return a newly allocated String with the result.
*
* @param source the data to encode.
* @return a newly allocated String with the result.
* @throws NullPointerException data is {@code null}.
*/
@Override
public String encodeString(final byte[] source) {
if(null == source) {
throw new NullPointerException("source must be not null.");
}
int maxEncodeLength = ((source.length + 7) * BYTES_PER_ENCODED_BLOCK / BITS_PER_ENCODED_BYTE);
StringBuilder result = new StringBuilder(maxEncodeLength);
int idx = 0;
int digit;
for(int i = 0; i < source.length;) {
byte b = source[i];
int currByte = (b >= 0)? b: (b + 256);
int nextByte;
if (idx > 3) {
if ((i + 1) < source.length) {
byte next = source[i + 1];
nextByte = (next >= 0)? next: (next + 256);
} else {
nextByte = 0;
}
digit = (int) (currByte & (MASK_8BITS >> idx));
idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;
digit <<= idx;
digit |= (nextByte >> (BYTES_PER_ENCODED_BLOCK - idx));
i++;
} else {
digit = (int) ((currByte >> (BYTES_PER_ENCODED_BLOCK - (idx + BIT_WIDTH))) & MASK_5BITS);
idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;
if (idx == 0) {
i++;
}
}
result.append((char) encodeTable[digit]);
}
while((result.length() % BYTES_PER_ENCODED_BLOCK) != 0){
result.append(padding);
}
return result.toString();
}
/**
* Decode the Base32-encoded data in input and return the data in a new byte array.
*
* @param str the data to decode.
* @return the data in a new byte array.
* @throws IllegalArgumentException data is not divisible by 8.
* @throws NullPointerException data is {@code null}.
*/
@Override
public byte[] decode(final String str) throws IllegalArgumentException, NullPointerException {
Objects.requireNonNull(str, "input string must be not null.");
if ((str.length() % BYTES_PER_ENCODED_BLOCK) != 0) {
throw new IllegalArgumentException("input string length must be divisible by 8.");
}
int len = str.endsWith(String.valueOf(padding))?
str.indexOf(padding) * BITS_PER_ENCODED_BYTE / BYTES_PER_ENCODED_BLOCK:
str.length() * BITS_PER_ENCODED_BYTE / BYTES_PER_ENCODED_BLOCK;
byte[] result = new byte[len];
for (int i = 0, idx = 0, offset = 0; i < str.length(); i++) {
int lookup = str.charAt(i);
/* Skip chars outside the lookup table */
if (lookup >= decodeTable.length) {
continue;
}
int digit = decodeTable[lookup];
/* If this digit is not in the table, ignore it */
if (digit < 0) {
continue;
}
if (idx <= 3) {
idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;
if (idx == 0) {
result[offset] |= digit;
offset++;
if (offset >= result.length) {
break;
}
} else {
result[offset] |= digit << (BYTES_PER_ENCODED_BLOCK - idx);
}
} else {
idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;
result[offset] |= (digit >>> idx);
offset++;
if (offset >= result.length) {
break;
}
result[offset] |= digit << (BYTES_PER_ENCODED_BLOCK - idx);
}
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy