
com.pippsford.encoding.GenericBase32 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of common-utils Show documentation
Show all versions of common-utils Show documentation
Code that seems to me to have utility across multiple projects
The newest version!
package com.pippsford.encoding;
import java.util.Arrays;
import javax.annotation.Nullable;
import javax.annotation.concurrent.ThreadSafe;
/**
* Base 32 encoding as defined in RFC4648. Five bytes of binary data become eight characters.
*
* @author Simon Greatrix
*/
@ThreadSafe
// Suppressing "cognitive complexity" warnings as data validation requires a lot of if-then constructs
// which leads to a high cognitive complexity. Sometimes there are just a lot of rules to check.
@SuppressWarnings("squid:S3776")
public class GenericBase32 implements Converter {
/**
* Value matchers for overflow detection.
*/
static final int[] OVERFLOWS = {12, -1, 2, -1, 5, 7, -1, 10};
/**
* Characters for each of the values 0 to 31.
*/
final char[] chars;
/**
* Must padding be applied on encoding?.
*/
final boolean mustPad;
/**
* The padding character.
*/
final char pad;
/**
* Is upper case preferred?.
*/
final boolean preferUpper;
/**
* Contributions of each character to each of the three bytes in a block. Each byte has two or three contributing
* characters. The final column is all zero as it makes some code easier.
*/
final byte[][] values = new byte[13][128];
/**
* Create a new generic Base 32 converter.
*
* @param chars the character to use. Must be all of one case.
* @param pad the padding character, if any
* @param preferUpper if true, prefer upper case characters
* @param mustPad whether encoding must be padded
*/
public GenericBase32(char[] chars, char pad, boolean preferUpper, boolean mustPad) {
this.chars = chars.clone();
this.pad = pad;
this.preferUpper = preferUpper;
this.mustPad = mustPad;
for (int i = 0; i < 12; i++) {
Arrays.fill(values[i], (byte) -1);
}
// Not strictly necessary as Java initialises to zero.
Arrays.fill(values[12], (byte) 0);
for (int i = 0; i < 32; i++) {
int j = this.chars[i];
// 11111222 22333334 44445555 56666677 77788888
// 0 1 2 3 4 5 6 78 9 0 1
values[0][j] = (byte) (i << 3);
values[1][j] = (byte) ((i & 0b11100) >> 2);
values[2][j] = (byte) ((i & 0b00011) << 6);
values[3][j] = (byte) (i << 1);
values[4][j] = (byte) ((i & 0b10000) >> 4);
values[5][j] = (byte) ((i & 0b01111) << 4);
values[6][j] = (byte) ((i & 0b11110) >> 1);
values[7][j] = (byte) ((i & 0b00001) << 7);
values[8][j] = (byte) (i << 2);
values[9][j] = (byte) ((i & 0b11000) >> 3);
values[10][j] = (byte) ((i & 0b00111) << 5);
values[11][j] = (byte) i;
}
// encode other case
for (int i = 0; i < 12; i++) {
for (int j = 'a'; j <= 'z'; j++) {
byte v = values[i][j];
if (v != -1) {
values[i][j - 32] = v;
}
}
for (int j = 'A'; j <= 'Z'; j++) {
byte v = values[i][j];
if (v != -1) {
values[i][j + 32] = v;
}
}
}
}
@Nullable
@Override
public char[] clean(char[] text) {
if (text == null) {
return null;
}
char[] buf = new char[text.length];
int pos = 0;
for (char aText : text) {
char ch = aText;
if ((ch < 128) && values[11][ch] != -1) {
if (preferUpper) {
// convert to upper case
if ('a' <= ch && ch <= 'z') {
ch -= 32;
}
} else {
// convert to lower case
if ('A' <= ch && ch <= 'Z') {
ch += 32;
}
}
buf = TextToByte.append(buf, pos++, ch);
}
}
int r = pos % 8;
char ch = (pos > 0) ? buf[pos - 1] : chars[0];
// Try to repair any trailing bits by adding a zero.
while (OVERFLOWS[r] == -1 || values[OVERFLOWS[r]][ch] != 0) {
buf = TextToByte.append(buf, pos++, chars[0]);
r = (r + 1) % 8;
}
if (mustPad && r != 0) {
while (r < 8) {
r++;
buf = TextToByte.append(buf, pos++, pad);
}
}
return TextToByte.trim(buf, pos);
}
@Nullable
@Override
public byte[] decode(char[] text) {
if (text == null) {
return null;
}
int end = TextToByte.removeWhitespaceInPlace(text);
while ((end > 0) && (text[end - 1] == pad)) {
end--;
}
if (end == 0) {
return new byte[0];
}
// Validate length after padding removed.
final int rem = end & 0x7;
if (OVERFLOWS[rem] == -1) {
throw new IllegalArgumentException(
"Input text has invalid length of " + end + ": " + new String(text));
}
// every 8 characters produces 5 bytes, so multiply by 5/8 = 0.625
int byteLen = (int) (end * 0.625);
byte[] data = new byte[byteLen];
// extract data from text
int di = 0;
int r = 0;
char c0 = '\0';
char c1 = '\0';
for (int ti = 0; ti < end; ti++) {
char cn = text[ti];
if ((cn > 127) || values[0][cn] == -1) {
throw new IllegalArgumentException(
"Input text has invalid character 0x"
+ Integer.toHexString(cn)
+ " at position " + ti + ":" + new String(text));
}
// 00000111 11222223 33334444 45555566 66677777
switch (r) {
case 0:
c0 = cn;
break;
case 1:
data[di] = (byte) (values[0][c0] | values[1][cn]);
di++;
c0 = cn;
break;
case 2:
case 5:
c1 = cn;
break;
case 3:
data[di] = (byte) (values[2][c0] | values[3][c1] | values[4][cn]);
c0 = cn;
di++;
break;
case 4:
data[di] = (byte) (values[5][c0] | values[6][cn]);
c0 = cn;
di++;
break;
case 6:
data[di] = (byte) (values[7][c0] | values[8][c1] | values[9][cn]);
c0 = cn;
di++;
break;
case 7:
data[di] = (byte) (values[10][c0] | values[11][cn]);
di++;
break;
default:
// Required by coding rules. There is literally no way to reach this line but a 'default' case was
// required.
throw new AssertionError("Remainder modulo-8 was not between 0 and 7 inclusive.");
}
r = (r + 1) & 0x7;
}
// Verify final bits are zero
if (values[OVERFLOWS[rem]][text[end - 1]] != 0) {
throw new IllegalArgumentException(
"Trailing bits detected in encoding \"..." + new String(text, end - rem, rem) + "\".");
}
return data;
}
@Nullable
@Override
public char[] encodeChars(byte[] bytes) {
if (bytes == null) {
return null;
}
// every five bytes requires 8 characters of output
int fullBlocks = bytes.length / 5;
int extraBytes = bytes.length - 5 * fullBlocks;
int textLen = 8 * fullBlocks;
if (extraBytes != 0) {
if (mustPad) {
textLen += 8;
} else {
switch (extraBytes) {
case 1:
textLen += 2;
break;
case 2:
textLen += 4;
break;
case 3:
textLen += 5;
break;
case 4:
textLen += 7;
break;
default:
break;
}
}
}
char[] output = new char[textLen];
byte b0;
byte b1;
byte b2;
byte b3;
byte b4;
// 00000111 11222223 33334444 45555566 66677777
for (int i = 0; i < fullBlocks; i++) {
int j = i * 5;
b0 = bytes[j];
b1 = bytes[j + 1];
b2 = bytes[j + 2];
b3 = bytes[j + 3];
b4 = bytes[j + 4];
int k = i * 8;
output[k] = chars[(b0 & 0b11111000) >> 3];
output[k + 1] = chars[((b0 & 0b00000111) << 2) + ((b1 & 0b11000000) >> 6)];
output[k + 2] = chars[(b1 & 0b00111110) >> 1];
output[k + 3] = chars[((b1 & 0b00000001) << 4) + ((b2 & 0b11110000) >> 4)];
output[k + 4] = chars[((b2 & 0b00001111) << 1) + ((b3 & 0b10000000) >> 7)];
output[k + 5] = chars[(b3 & 0b01111100) >> 2];
output[k + 6] = chars[((b3 & 0b00000011) << 3) + ((b4 & 0b11100000) >> 5)];
output[k + 7] = chars[(b4 & 0b00011111)];
}
int k = fullBlocks * 8;
int j = fullBlocks * 5;
switch (extraBytes) {
case 1:
b0 = bytes[j];
output[k] = chars[(b0 & 0b11111000) >> 3];
output[k + 1] = chars[((b0 & 0b00000111) << 2)];
if (mustPad) {
output[k + 2] = pad;
output[k + 3] = pad;
output[k + 4] = pad;
output[k + 5] = pad;
output[k + 6] = pad;
output[k + 7] = pad;
}
break;
case 2:
b0 = bytes[j];
b1 = bytes[j + 1];
output[k] = chars[(b0 & 0b11111000) >> 3];
output[k + 1] = chars[((b0 & 0b00000111) << 2) + ((b1 & 0b11000000) >> 6)];
output[k + 2] = chars[(b1 & 0b00111110) >> 1];
output[k + 3] = chars[((b1 & 0b00000001) << 4)];
if (mustPad) {
output[k + 4] = pad;
output[k + 5] = pad;
output[k + 6] = pad;
output[k + 7] = pad;
}
break;
case 3:
b0 = bytes[j];
b1 = bytes[j + 1];
b2 = bytes[j + 2];
output[k] = chars[(b0 & 0b11111000) >> 3];
output[k + 1] = chars[((b0 & 0b00000111) << 2) + ((b1 & 0b11000000) >> 6)];
output[k + 2] = chars[(b1 & 0b00111110) >> 1];
output[k + 3] = chars[((b1 & 0b00000001) << 4) + ((b2 & 0b11110000) >> 4)];
output[k + 4] = chars[(b2 & 0b00001111) << 1];
if (mustPad) {
output[k + 5] = pad;
output[k + 6] = pad;
output[k + 7] = pad;
}
break;
case 4:
b0 = bytes[j];
b1 = bytes[j + 1];
b2 = bytes[j + 2];
b3 = bytes[j + 3];
output[k] = chars[(b0 & 0b11111000) >> 3];
output[k + 1] = chars[((b0 & 0b00000111) << 2) + ((b1 & 0b11000000) >> 6)];
output[k + 2] = chars[(b1 & 0b00111110) >> 1];
output[k + 3] = chars[((b1 & 0b00000001) << 4) + ((b2 & 0b11110000) >> 4)];
output[k + 4] = chars[((b2 & 0b00001111) << 1) + ((b3 & 0b10000000) >> 7)];
output[k + 5] = chars[(b3 & 0b01111100) >> 2];
output[k + 6] = chars[(b3 & 0b00000011) << 3];
if (mustPad) {
output[k + 7] = pad;
}
break;
default:
break;
}
return output;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy