org.seppiko.commons.utils.codec.Base32 Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-utils Show documentation
A toolkit package
There is a newer version: 2.11.0
/*
 * Copyright 2023 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.seppiko.commons.utils.codec;

import java.io.Serial;
import java.io.Serializable;
import java.util.Objects;
import org.seppiko.commons.utils.CharUtil;

/**
 * Base32 encoding and decoding.
 *
 * @see RFC4648 §6
 * @see RFC4648 §7
 * @see Base32
 * @author Leonard Woo
 */
public class Base32 implements BaseNCodec, Serializable {

  @Serial
  private static final long serialVersionUID = -5230113688025284966L;

  /**
   * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet"
   * equivalents as specified in Table 3 of RFC 4648.
   */
  private static final byte[] RFC4648_ENCODE_TABLE = {
      'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
      'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
      '2', '3', '4', '5', '6', '7',
  };

  /**
   * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified
   * in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32
   * alphabet but fall within the bounds of the array are translated to -1.
   */
  private static final byte[] RFC4648_DECODE_TABLE = {
   //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
      -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
      -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
      15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
                                                  -1, -1, -1, -1, -1, // 5b-5f
      -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
      15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
  };

  /**
   * This array is a lookup table that translates 5-bit positive integer index values into their
   * "Base32 Hex Alphabet" equivalents as specified in Table 4 of RFC 4648.
   */
  private static final byte[] RFC4648_HEX_ENCODE_TABLE = {
      '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
      'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
      'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
  };

  /**
   * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as
   * specified in Table 4 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the
   * Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
   */
  private static final byte[] RFC4648_HEX_DECODE_TABLE = {
   //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
      -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
      25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
                                  -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
      -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
      25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
  };

  /**
   * z-base-32 is a Base32 encoding designed by Zooko Wilcox-O'Hearn to be easier for human use
   * and more compact. It includes 1, 8 and 9 but excludes l, v and 2.
   * It also permutes the alphabet so that the easier characters are the ones that occur more
   * frequently.
   */
  private static final byte[] ZBASE32_ENCODE_TABLE = {
      'y', 'b', 'n', 'd', 'r', 'f', 'g', '8', 'e', 'j', 'k', 'm', 'c',
      'p', 'q', 'x', 'o', 't', '1', 'u', 'w', 'i', 's', 'z', 'a', '3',
      '4', '5', 'h', '7', '6', '9',
  };

  /**
   * This array is a lookup table that translates Unicode characters drawn from the "Z-Base32 Alphabet"
   * into their 5-bit positive integer equivalents. Characters that are not in the Z-Base32 alphabet
   * but fall within the bounds of the array are translated to -1.
   */
  private static final byte[] ZBASE32_DECODE_TABLE = {
  //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
      -1, 18, -1, 25, 26, 27, 30, 29,  7, 31, -1, -1, -1, -1, -1, -1, // 30-3f 1-9
      -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
      15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
                                                  -1, -1, -1, -1, -1, // 5b-5f
      -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
      15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
  };

  /**
   * The most widely used Base32 alphabet is defined in RFC 4648. It uses an alphabet of A–Z,
   * followed by 2–7. The digits 0, 1 and 8 are skipped due to their similarity with the letters O,
   * I and B (thus "2" has a decimal value of 26).
   */
  public static final Base32 RFC4648 = new Base32(RFC4648_ENCODE_TABLE, RFC4648_DECODE_TABLE);

  /**
   * The most widely used Base32 alphabet is defined in RFC 4648. It uses a digit of 0-9,
   * followed by A–V.
   */
  public static final Base32 RFC4648_HEX = new Base32(RFC4648_HEX_ENCODE_TABLE, RFC4648_HEX_DECODE_TABLE);

  /**
   * z-base-32 is a Base32 encoding designed by Zooko Wilcox-O'Hearn to be easier for human use
   * and more compact. It includes 1, 8 and 9 but excludes l, v and 2.
   * It also permutes the alphabet so that the easier characters are the ones that occur more
   * frequently.
   *
   * @see human-oriented base-32 encoding
   */
  public static final Base32 ZBASE32 = new Base32(ZBASE32_ENCODE_TABLE, ZBASE32_DECODE_TABLE);

  /** base32 encode table */
  private final byte[] encodeTable;
  /** base32 decode table */
  private final byte[] decodeTable;
  /** base32 padding char */
  private final char padding;

  private Base32(byte[] encodeTable, byte[] decodeTable) {
    if (encodeTable.length != 32) {
      throw new IllegalArgumentException("base32 alphabet must have 32 chars.");
    }
    this.encodeTable = encodeTable;
    this.decodeTable = decodeTable;
    this.padding = CharUtil.EQUALS_SIGN;
  }

  /**
   * Base32-encode the given data and return a newly allocated String with the result.
   *
   * @param source the data to encode.
   * @return a newly allocated String with the result.
   * @throws NullPointerException data is {@code null}.
   */
  @Override
  public String encodeString(final byte[] source) {
    if(null == source) {
      throw new NullPointerException("source must be not null.");
    }

    int maxEncodeLength = ((source.length + 7) * BYTES_PER_ENCODED_BLOCK / BITS_PER_ENCODED_BYTE);
    StringBuilder result = new StringBuilder(maxEncodeLength);
    int idx = 0;
    int digit;

    for(int i = 0; i < source.length;) {
      byte b = source[i];
      int currByte = (b >= 0)? b: (b + 256);
      int nextByte;
      if (idx > 3) {
        if ((i + 1) < source.length) {
          byte next = source[i + 1];
          nextByte = (next >= 0)? next: (next + 256);
        } else {
          nextByte = 0;
        }

        digit = (int) (currByte & (MASK_8BITS >> idx));
        idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;
        digit <<= idx;
        digit |= (nextByte >> (BYTES_PER_ENCODED_BLOCK - idx));
        i++;
      } else {
        digit = (int) ((currByte >> (BYTES_PER_ENCODED_BLOCK - (idx + BIT_WIDTH))) & MASK_5BITS);
        idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;

        if (idx == 0) {
          i++;
        }
      }

      result.append((char) encodeTable[digit]);
    }

    while((result.length() % BYTES_PER_ENCODED_BLOCK) != 0){
      result.append(padding);
    }

    return result.toString();
  }

  /**
   * Decode the Base32-encoded data in input and return the data in a new byte array.
   *
   * @param str the data to decode.
   * @return the data in a new byte array.
   * @throws IllegalArgumentException data is not divisible by 8.
   * @throws NullPointerException data is {@code null}.
   */
  @Override
  public byte[] decode(final String str) throws IllegalArgumentException, NullPointerException {
    Objects.requireNonNull(str, "input string must be not null.");
    if ((str.length() % BYTES_PER_ENCODED_BLOCK) != 0) {
      throw new IllegalArgumentException("input string length must be divisible by 8.");
    }

    int len = str.endsWith(String.valueOf(padding))?
        str.indexOf(padding) * BITS_PER_ENCODED_BYTE / BYTES_PER_ENCODED_BLOCK:
        str.length() * BITS_PER_ENCODED_BYTE / BYTES_PER_ENCODED_BLOCK;
    byte[] result = new byte[len];

    for (int i = 0, idx = 0, offset = 0; i < str.length(); i++) {
      int lookup = str.charAt(i);

      /* Skip chars outside the lookup table */
      if (lookup >= decodeTable.length) {
        continue;
      }

      int digit = decodeTable[lookup];

      /* If this digit is not in the table, ignore it */
      if (digit < 0) {
        continue;
      }

      if (idx <= 3) {
        idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;
        if (idx == 0) {
          result[offset] |= digit;
          offset++;
          if (offset >= result.length) {
            break;
          }
        } else {
          result[offset] |= digit << (BYTES_PER_ENCODED_BLOCK - idx);
        }
      } else {
        idx = (idx + BIT_WIDTH) % BYTES_PER_ENCODED_BLOCK;
        result[offset] |= (digit >>> idx);
        offset++;

        if (offset >= result.length) {
          break;
        }
        result[offset] |= digit << (BYTES_PER_ENCODED_BLOCK - idx);
      }
    }
    return result;
  }
}