com.pippsford.encoding.GenericAscii85 Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of common-utils Show documentation
Code that seems to me to have utility across multiple projects
The newest version!
package com.pippsford.encoding;

import javax.annotation.Nullable;
import javax.annotation.concurrent.ThreadSafe;

/**
 * Convert data into a Ascii85 encoded form where every four bytes of data is encoded as five characters.
 *
 * See Wikipedia description of Ascii85 encoding
 *
 * 
Note Ascii85 uses many characters which are considered special in some systems. For maximum portability, use
 * Base64 instead.
 *
 * @author Simon Greatrix
 */
// Suppressing "cognitive complexity" warnings as data validation requires a lot of if-then constructs
// which leads to a high cognitive complexity. Sometimes there are just a lot of rules to check.
@SuppressWarnings("squid:S3776")
@ThreadSafe
public class GenericAscii85 implements Converter {

  /**
   * The encoding can be quoted between "<~" and "~>".
   */
  final boolean useQuotes;

  /**
   * A 'y' can represent 4 consecutive spaces (0x20).
   */
  final boolean useY;

  /**
   * A 'z' can represent 4 consecutive zero bytes.
   */
  final boolean useZ;


  /**
   * Create an Ascii 85 converter.
   *
   * @param useZ      does 'z' represent four consecutive zeros?
   * @param useY      does 'y' represent four consecutive spaces (0x20)?
   * @param useQuotes is the canonical form quoted between "<~" and "~>"?
   */
  public GenericAscii85(boolean useZ, boolean useY, boolean useQuotes) {
    this.useZ = useZ;
    this.useY = useY;
    this.useQuotes = useQuotes;
  }


  /**
   * {@inheritDoc}
   */
  @SuppressWarnings("squid:S135")
  @Nullable
  @Override
  public char[] clean(char[] text) {
    if (text == null) {
      return null;
    }
    int start = 0;
    int end = text.length;
    int p = indexOf(text, 0, '<', '~');
    if (p != -1) {
      start = p + 2;
    }
    p = indexOf(text, start, '~', '>');
    if (p != -1) {
      end = p;
    }

    char[] buf = new char[end - start];
    int pos = 0;
    if (useQuotes) {
      buf = TextToByte.append(buf, pos++, '<');
      buf = TextToByte.append(buf, pos++, '~');
    }

    int j = 0;
    long v = 0;
    for (int i = start; i < end; i++) {
      char ch = text[i];
      if (j == 0) {
        if (ch == 'z' && useZ) {
          buf = TextToByte.append(buf, pos++, 'z');
          continue;
        }
        if (ch == 'y' && useY) {
          buf = TextToByte.append(buf, pos++, 'y');
          continue;
        }
      }

      if (ch < '!' || ch > 'u') {
        // bad character, skip
        continue;
      }

      buf = TextToByte.append(buf, pos++, ch);
      j++;
      v = v * 85 + (ch - '!');
      if (j == 5) {
        // remove invalid patterns
        if (v >= 0x100000000L) {
          pos -= 5;
        }
        if (v == 0 && useZ) {
          pos -= 5;
          buf = TextToByte.append(buf, pos++, 'z');
        }
        if (v == 0x20202020 && useY) {
          pos -= 5;
          buf = TextToByte.append(buf, pos++, 'y');
        }
        j = 0;
        v = 0;
      }
    }

    if (j == 1) {
      // trailing character, remove it
      pos--;
    } else if (j > 1) {
      // Re-encode final bytes to ensure correct format
      for (int i = j; i <= 4; i++) {
        v *= 85;
      }
      for (int i = j; i <= 4; i++) {
        v |= 0xff << (8 * (i - j));
      }

      if (v >= 0x100000000L) {
        // final bytes would have been invalid, so remove
        pos -= j;
      } else {
        // re-encode final bytes
        for (int i = j; i <= 4; i++) {
          v /= 85;
        }
        for (int i = 1; i <= j; i++) {
          buf[pos - i] = (char) ('!' + v % 85);
          v /= 85;
        }
      }
    }

    if (useQuotes) {
      buf = TextToByte.append(buf, pos++, '~');
      buf = TextToByte.append(buf, pos++, '>');
    }

    return TextToByte.trim(buf, pos);
  }


  /**
   * Decode the provided textual representation back into binary data.
   *
   * 
The footer of ~> may be included in the input. Any characters after the first footer will be ignored.
   *
   * The header of <~ may be included in the input. Any characters before the header will be ignored.
   *
   * @param text textual representation
   *
   * @return binary data
   */
  @Override
  @Nullable
  public byte[] decode(char[] text) {
    if (text == null) {
      return null;
    }

    // strip out all whitespace
    int start = 0;
    int end = TextToByte.removeWhitespaceInPlace(text);

    int p = indexOf(text, 0, '<', '~');
    if (p != -1) {
      start = p + 2;
    }
    p = indexOf(text, start, '~', '>');
    if (p != -1) {
      end = p;
    }

    // get data length
    long block = 0;
    int len = 0;
    int j = 0;
    for (int i = start; i < end; i++) {
      char ch = text[i];
      if (j == 0) {
        if ((useZ && ch == 'z') || (useY && ch == 'y')) {
          // z is 4 zeroes
          // y is 4 spaces
          len += 4;
        } else {
          if ((ch < '!' || ch > 'u')) {
            throw new IllegalArgumentException(
                "Invalid ASCII85. Bad character 0x"
                    + Integer.toHexString(ch)
                    + " in input");
          }
          j++;
          block = (long) ch - '!';
        }
      } else {
        if ((ch < '!' || ch > 'u')) {
          throw new IllegalArgumentException(
              "Invalid ASCII85. Bad character 0x"
                  + Integer.toHexString(ch) + " in input");
        }
        len++;
        j++;
        block = (block * 85) + (ch - '!');
        if (block >= 0x100000000L) {
          throw new IllegalArgumentException(
              "Invalid ASCII85. Bad 5 character sequence \"" + new String(text, i - 4, 5) + "\" in input");
        }
        if (j == 5) {
          j = 0;
        }
      }
    }

    if (j == 1) {
      throw new IllegalArgumentException(
          "Invalid ASCII85. Only one character in final 5-character block of: "
              + new String(text));
    }

    byte[] data = new byte[len];
    j = 0;
    len = 0;
    block = 0;
    for (int i = start; i < end; i++) {
      char ch = text[i];
      if (j == 0) {
        if (ch == 'z') {
          // z is 4 zeroes
          data[len] = 0;
          data[len + 1] = 0;
          data[len + 2] = 0;
          data[len + 3] = 0;
          len += 4;
        } else if (ch == 'y') {
          // y is 4 spaces
          data[len] = 0x20;
          data[len + 1] = 0x20;
          data[len + 2] = 0x20;
          data[len + 3] = 0x20;
          len += 4;
        } else {
          block = (long) ch - '!';
          j++;
        }
      } else {
        block = (block * 85) + (ch - '!');
        j++;
        if (j == 5) {
          j = 0;
          data[len] = (byte) ((block & 0xff000000) >> 24);
          len++;
          data[len] = (byte) ((block & 0x00ff0000) >> 16);
          len++;
          data[len] = (byte) ((block & 0x0000ff00) >> 8);
          len++;
          data[len] = (byte) (block & 0x000000ff);
          len++;
          block = 0;
        }
      }
    }

    switch (j) {
      case 2:
        block = block * 85 * 85 * 85;
        data[len] = (byte) ((block & 0xff000000) >> 24);
        break;
      case 3:
        block = block * 85 * 85;
        data[len] = (byte) ((block & 0xff000000) >> 24);
        len++;
        data[len] = (byte) ((block & 0x00ff0000) >> 16);
        break;
      case 4:
        block = block * 85;
        data[len] = (byte) ((block & 0xff000000) >> 24);
        len++;
        data[len] = (byte) ((block & 0x00ff0000) >> 16);
        len++;
        data[len] = (byte) ((block & 0x0000ff00) >> 8);
        break;
      default:
        break;
    }
    return data;
  }


  /**
   * Encode the provided binary data in a textual form.
   *
   * @param bytes binary data
   *
   * @return textual representation
   */
  @Nullable
  @Override
  public char[] encodeChars(byte[] bytes) {
    if (bytes == null) {
      return null;
    }
    // every 4 bytes requires 5 characters of output
    int fullBlocks = bytes.length / 4;
    int extraBytes = bytes.length - 4 * fullBlocks;
    int extraChars;
    switch (extraBytes) {
      case 0:
        extraChars = 0;
        break;
      case 1:
        extraChars = 3;
        break; // need one spare
      default:
        extraChars = extraBytes + 1;
        break;
    }

    // output size is 5 chars per full block, some extra chars for the
    // last block, and 4 chars for header and footer
    char[] output = new char[5 * fullBlocks + extraChars + (useQuotes ? 4 : 0)];

    // set header
    int j = 0;
    if (useQuotes) {
      output[0] = '<';
      output[1] = '~';
      j = 2;
    }
    int k = 0;
    for (int i = 0; i < fullBlocks; i++) {
      j += getBlock(output, j, bytes[k], bytes[k + 1], bytes[k + 2],
          bytes[k + 3]
      );
      k += 4;
    }

    // the final block consists of one more character than there are
    // bytes, without the special 'z' for all zero.
    switch (extraBytes) {
      case 1:
        getFinalBlock(output, j, 1, bytes[k], (byte) 0xff, (byte) 0xff);
        j += 2;
        break;
      case 2:
        getFinalBlock(output, j, 2, bytes[k], bytes[k + 1], (byte) 0xff);
        j += 3;
        break;
      case 3:
        getFinalBlock(output, j, 3, bytes[k], bytes[k + 1], bytes[k + 2]);
        j += 4;
        break;
      default:
        break;
    }

    // output footer
    if (useQuotes) {
      output[j] = '~';
      output[j + 1] = '>';
      j += 2;
    }

    return TextToByte.trim(output, j);
  }


  private int getBlock(
      char[] output, int offset, byte b0, byte b1,
      byte b2, byte b3
  ) {
    long v = (((long) (0xff & b0)) << 24) | ((0xff & b1) << 16)
        | ((0xff & b2) << 8) | (0xff & b3);

    if (v == 0 && useZ) {
      output[offset] = 'z';
      return 1;
    }

    if (v == 0x20202020 && useY) {
      output[offset] = 'y';
      return 1;
    }

    for (int i = 4; i >= 0; i--) {
      output[offset + i] = (char) ('!' + v % 85);
      v /= 85;
    }
    return 5;
  }


  private void getFinalBlock(
      char[] output, int offset, int count, byte b0, byte b1,
      byte b2
  ) {
    long v = (((long) (0xff & b0)) << 24) | ((0xff & b1) << 16)
        | ((0xff & b2) << 8) | 0xff;

    for (int i = 4; i > count; i--) {
      v /= 85;
    }
    for (int i = count; i >= 0; i--) {
      output[offset + i] = (char) ('!' + v % 85);
      v /= 85;
    }
  }


  /**
   * Find two adjacent characters in the text.
   *
   * @param text  the text
   * @param start where to start
   * @param c1    first character
   * @param c2    second character
   *
   * @return location, or -1
   */
  private int indexOf(char[] text, int start, char c1, char c2) {
    int len = text.length - 1;
    for (int i = start; i < len; i++) {
      if (text[i] == c1 && text[i + 1] == c2) {
        return i;
      }
    }
    return -1;
  }

}