com.google.gwt.thirdparty.guava.common.io.BaseEncoding Maven / Gradle / Ivy
* Copyright (C) 2012 The Guava Authors
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
package com.google.gwt.thirdparty.guava.common.io;
import static com.google.gwt.thirdparty.guava.common.base.Preconditions.checkArgument;
import static com.google.gwt.thirdparty.guava.common.base.Preconditions.checkNotNull;
import static com.google.gwt.thirdparty.guava.common.base.Preconditions.checkPositionIndexes;
import static com.google.gwt.thirdparty.guava.common.base.Preconditions.checkState;
import static com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.asCharInput;
import static com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.asCharOutput;
import static com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.asInputStream;
import static com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.asOutputStream;
import static com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.stringBuilderOutput;
import static com.google.gwt.thirdparty.guava.common.math.IntMath.divide;
import static com.google.gwt.thirdparty.guava.common.math.IntMath.log2;
import static java.math.RoundingMode.CEILING;
import static java.math.RoundingMode.FLOOR;
import static java.math.RoundingMode.UNNECESSARY;
import com.google.gwt.thirdparty.guava.common.annotations.Beta;
import com.google.gwt.thirdparty.guava.common.annotations.GwtCompatible;
import com.google.gwt.thirdparty.guava.common.annotations.GwtIncompatible;
import com.google.gwt.thirdparty.guava.common.base.Ascii;
import com.google.gwt.thirdparty.guava.common.base.CharMatcher;
import com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.ByteInput;
import com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.ByteOutput;
import com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.CharInput;
import com.google.gwt.thirdparty.guava.common.io.GwtWorkarounds.CharOutput;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.util.Arrays;
import javax.annotation.CheckReturnValue;
import javax.annotation.Nullable;
* A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
* strings. This class includes several constants for encoding schemes specified by RFC 4648. For example, the expression:
* {@code
* BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}
* returns the string {@code "MZXW6==="}, and
* byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}
* ...returns the ASCII bytes of the string {@code "foo"}.
By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with
* RFC 4648. Decoding rejects characters in the wrong case, though padding is optional.
* To modify encoding and decoding behavior, use configuration methods to obtain a new encoding
* with modified behavior:
* BaseEncoding.base16().lowerCase().decode("deadbeef");}
* Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
* on the receiving instance; you must store and use the new encoding instance it returns, instead.
* // Do NOT do this
* BaseEncoding hex = BaseEncoding.base16();
* hex.lowerCase(); // does nothing!
* return hex.decode("deadbeef"); // throws an IllegalArgumentException}
* It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
* {@code x}, but the reverse does not necessarily hold.
* Encoding
* Alphabet
* {@code char:byte} ratio
* Default padding
* Comments
* {@link #base16()}
* 0-9 A-F
* 2.00
* N/A
* Traditional hexadecimal. Defaults to upper case.
* {@link #base32()}
* A-Z 2-7
* 1.60
* =
* Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
* {@link #base32Hex()}
* 0-9 A-V
* 1.60
* =
* "Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
* {@link #base64()}
* A-Z a-z 0-9 + /
* 1.33
* =
* {@link #base64Url()}
* A-Z a-z 0-9 - _
* 1.33
* =
* Safe to use as filenames, or to pass in URLs without escaping
* All instances of this class are immutable, so they may be stored safely as static constants.
* @author Louis Wasserman
* @since 14.0
@GwtCompatible(emulated = true)
public abstract class BaseEncoding {
// TODO(user): consider adding encodeTo(Appendable, byte[], [int, int])
BaseEncoding() {}
* Exception indicating invalid base-encoded input encountered while decoding.
* @author Louis Wasserman
* @since 15.0
public static final class DecodingException extends IOException {
DecodingException(String message) {
DecodingException(Throwable cause) {
* Encodes the specified byte array, and returns the encoded {@code String}.
public String encode(byte[] bytes) {
return encode(checkNotNull(bytes), 0, bytes.length);
* Encodes the specified range of the specified byte array, and returns the encoded
* {@code String}.
public final String encode(byte[] bytes, int off, int len) {
checkPositionIndexes(off, off + len, bytes.length);
CharOutput result = stringBuilderOutput(maxEncodedSize(len));
ByteOutput byteOutput = encodingStream(result);
try {
for (int i = 0; i < len; i++) {
byteOutput.write(bytes[off + i]);
} catch (IOException impossible) {
throw new AssertionError("impossible");
return result.toString();
* Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
* {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing
* {@code Writer}.
public final OutputStream encodingStream(Writer writer) {
return asOutputStream(encodingStream(asCharOutput(writer)));
* Returns an {@code OutputSupplier} that supplies streams that encode bytes using this encoding
* into writers from the specified {@code OutputSupplier}.
* @deprecated Use {@link #encodingSink(CharSink)} instead. This method is scheduled to be
* removed in Guava 16.0.
public final OutputSupplier encodingStream(
final OutputSupplier extends Writer> writerSupplier) {
return new OutputSupplier() {
public OutputStream getOutput() throws IOException {
return encodingStream(writerSupplier.getOutput());
* Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
public final ByteSink encodingSink(final CharSink encodedSink) {
return new ByteSink() {
public OutputStream openStream() throws IOException {
return encodingStream(encodedSink.openStream());
// TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher)
private static byte[] extract(byte[] result, int length) {
if (length == result.length) {
return result;
} else {
byte[] trunc = new byte[length];
System.arraycopy(result, 0, trunc, 0, length);
return trunc;
* Decodes the specified character sequence, and returns the resulting {@code byte[]}.
* This is the inverse operation to {@link #encode(byte[])}.
* @throws IllegalArgumentException if the input is not a valid encoded string according to this
* encoding.
public final byte[] decode(CharSequence chars) {
try {
return decodeChecked(chars);
} catch (DecodingException badInput) {
throw new IllegalArgumentException(badInput);
* Decodes the specified character sequence, and returns the resulting {@code byte[]}.
* This is the inverse operation to {@link #encode(byte[])}.
* @throws DecodingException if the input is not a valid encoded string according to this
* encoding.
final byte[] decodeChecked(CharSequence chars) throws DecodingException {
chars = padding().trimTrailingFrom(chars);
ByteInput decodedInput = decodingStream(asCharInput(chars));
byte[] tmp = new byte[maxDecodedSize(chars.length())];
int index = 0;
try {
for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) {
tmp[index++] = (byte) i;
} catch (DecodingException badInput) {
throw badInput;
} catch (IOException impossible) {
throw new AssertionError(impossible);
return extract(tmp, index);
* Returns an {@code InputStream} that decodes base-encoded input from the specified
* {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific
* errors.
public final InputStream decodingStream(Reader reader) {
return asInputStream(decodingStream(asCharInput(reader)));
* Returns an {@code InputSupplier} that supplies input streams that decode base-encoded input
* from readers from the specified supplier.
* @deprecated Use {@link #decodingSource(CharSource)} instead. This method is scheduled to be
* removed in Guava 16.0.
public final InputSupplier decodingStream(
final InputSupplier extends Reader> readerSupplier) {
return new InputSupplier() {
public InputStream getInput() throws IOException {
return decodingStream(readerSupplier.getInput());
* Returns a {@code ByteSource} that reads base-encoded bytes from the specified
* {@code CharSource}.
public final ByteSource decodingSource(final CharSource encodedSource) {
return new ByteSource() {
public InputStream openStream() throws IOException {
return decodingStream(encodedSource.openStream());
// Implementations for encoding/decoding
abstract int maxEncodedSize(int bytes);
abstract ByteOutput encodingStream(CharOutput charOutput);
abstract int maxDecodedSize(int chars);
abstract ByteInput decodingStream(CharInput charInput);
abstract CharMatcher padding();
// Modified encoding generators
* Returns an encoding that behaves equivalently to this encoding, but omits any padding
* characters as specified by RFC 4648
* section 3.2, Padding of Encoded Data.
public abstract BaseEncoding omitPadding();
* Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
* for padding.
* @throws IllegalArgumentException if this padding character is already used in the alphabet or a
* separator
public abstract BaseEncoding withPadChar(char padChar);
* Returns an encoding that behaves equivalently to this encoding, but adds a separator string
* after every {@code n} characters. Any occurrences of any characters that occur in the separator
* are skipped over in decoding.
* @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
* string, or if {@code n <= 0}
* @throws UnsupportedOperationException if this encoding already uses a separator
public abstract BaseEncoding withSeparator(String separator, int n);
* Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
* uppercase letters. Padding and separator characters remain in their original case.
* @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
* lower-case characters
public abstract BaseEncoding upperCase();
* Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
* lowercase letters. Padding and separator characters remain in their original case.
* @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
* lower-case characters
public abstract BaseEncoding lowerCase();
private static final BaseEncoding BASE64 = new StandardBaseEncoding(
"base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
* The "base64" base encoding specified by RFC 4648 section 4, Base 64 Encoding.
* (This is the same as the base 64 encoding from RFC 3548.)
* The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
* omitted} or {@linkplain #withPadChar(char) replaced}.
No line feeds are added by default, as per RFC 4648 section 3.1, Line Feeds in
* Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
public static BaseEncoding base64() {
return BASE64;
private static final BaseEncoding BASE64_URL = new StandardBaseEncoding(
"base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
* The "base64url" encoding specified by RFC 4648 section 5, Base 64 Encoding
* with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64."
* (This is the same as the base 64 encoding with URL and filename safe alphabet from RFC 3548.)
The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
* omitted} or {@linkplain #withPadChar(char) replaced}.
No line feeds are added by default, as per RFC 4648 section 3.1, Line Feeds in
* Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
public static BaseEncoding base64Url() {
return BASE64_URL;
private static final BaseEncoding BASE32 =
new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
* The "base32" encoding specified by RFC 4648 section 6, Base 32 Encoding.
* (This is the same as the base 32 encoding from RFC 3548.)
The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
* omitted} or {@linkplain #withPadChar(char) replaced}.
No line feeds are added by default, as per RFC 4648 section 3.1, Line Feeds in
* Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
public static BaseEncoding base32() {
return BASE32;
private static final BaseEncoding BASE32_HEX =
new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
* The "base32hex" encoding specified by RFC 4648 section 7, Base 32 Encoding
* with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
* omitted} or {@linkplain #withPadChar(char) replaced}.
No line feeds are added by default, as per RFC 4648 section 3.1, Line Feeds in
* Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
public static BaseEncoding base32Hex() {
return BASE32_HEX;
private static final BaseEncoding BASE16 =
new StandardBaseEncoding("base16()", "0123456789ABCDEF", null);
* The "base16" encoding specified by RFC 4648 section 8, Base 16 Encoding.
* (This is the same as the base 16 encoding from RFC 3548.) This is commonly known as
* "hexadecimal" format.
No padding is necessary in base 16, so {@link #withPadChar(char)} and
* {@link #omitPadding()} have no effect.
No line feeds are added by default, as per RFC 4648 section 3.1, Line Feeds in
* Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
public static BaseEncoding base16() {
return BASE16;
private static final class Alphabet extends CharMatcher {
private final String name;
// this is meant to be immutable -- don't modify it!
private final char[] chars;
final int mask;
final int bitsPerChar;
final int charsPerChunk;
final int bytesPerChunk;
private final byte[] decodabet;
private final boolean[] validPadding;
Alphabet(String name, char[] chars) {
this.name = checkNotNull(name);
this.chars = checkNotNull(chars);
try {
this.bitsPerChar = log2(chars.length, UNNECESSARY);
} catch (ArithmeticException e) {
throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
* e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
* for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
this.charsPerChunk = 8 / gcd;
this.bytesPerChunk = bitsPerChar / gcd;
this.mask = chars.length - 1;
byte[] decodabet = new byte[Ascii.MAX + 1];
Arrays.fill(decodabet, (byte) -1);
for (int i = 0; i < chars.length; i++) {
char c = chars[i];
checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c);
checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
decodabet[c] = (byte) i;
this.decodabet = decodabet;
boolean[] validPadding = new boolean[charsPerChunk];
for (int i = 0; i < bytesPerChunk; i++) {
validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
this.validPadding = validPadding;
char encode(int bits) {
return chars[bits];
boolean isValidPaddingStartPosition(int index) {
return validPadding[index % charsPerChunk];
int decode(char ch) throws IOException {
if (ch > Ascii.MAX || decodabet[ch] == -1) {
throw new DecodingException("Unrecognized character: " + ch);
return decodabet[ch];
private boolean hasLowerCase() {
for (char c : chars) {
if (Ascii.isLowerCase(c)) {
return true;
return false;
private boolean hasUpperCase() {
for (char c : chars) {
if (Ascii.isUpperCase(c)) {
return true;
return false;
Alphabet upperCase() {
if (!hasLowerCase()) {
return this;
} else {
checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
char[] upperCased = new char[chars.length];
for (int i = 0; i < chars.length; i++) {
upperCased[i] = Ascii.toUpperCase(chars[i]);
return new Alphabet(name + ".upperCase()", upperCased);
Alphabet lowerCase() {
if (!hasUpperCase()) {
return this;
} else {
checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
char[] lowerCased = new char[chars.length];
for (int i = 0; i < chars.length; i++) {
lowerCased[i] = Ascii.toLowerCase(chars[i]);
return new Alphabet(name + ".lowerCase()", lowerCased);
public boolean matches(char c) {
return CharMatcher.ASCII.matches(c) && decodabet[c] != -1;
public String toString() {
return name;
static final class StandardBaseEncoding extends BaseEncoding {
// TODO(user): provide a useful toString
private final Alphabet alphabet;
private final Character paddingChar;
StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
this.alphabet = checkNotNull(alphabet);
checkArgument(paddingChar == null || !alphabet.matches(paddingChar),
"Padding character %s was already in alphabet", paddingChar);
this.paddingChar = paddingChar;
CharMatcher padding() {
return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue());
int maxEncodedSize(int bytes) {
return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
ByteOutput encodingStream(final CharOutput out) {
return new ByteOutput() {
int bitBuffer = 0;
int bitBufferLength = 0;
int writtenChars = 0;
public void write(byte b) throws IOException {
bitBuffer <<= 8;
bitBuffer |= b & 0xFF;
bitBufferLength += 8;
while (bitBufferLength >= alphabet.bitsPerChar) {
int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar))
& alphabet.mask;
bitBufferLength -= alphabet.bitsPerChar;
public void flush() throws IOException {
public void close() throws IOException {
if (bitBufferLength > 0) {
int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength))
& alphabet.mask;
if (paddingChar != null) {
while (writtenChars % alphabet.charsPerChunk != 0) {
int maxDecodedSize(int chars) {
return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
ByteInput decodingStream(final CharInput reader) {
return new ByteInput() {
int bitBuffer = 0;
int bitBufferLength = 0;
int readChars = 0;
boolean hitPadding = false;
final CharMatcher paddingMatcher = padding();
public int read() throws IOException {
while (true) {
int readChar = reader.read();
if (readChar == -1) {
if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
throw new DecodingException("Invalid input length " + readChars);
return -1;
char ch = (char) readChar;
if (paddingMatcher.matches(ch)) {
if (!hitPadding
&& (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
throw new DecodingException("Padding cannot start at index " + readChars);
hitPadding = true;
} else if (hitPadding) {
throw new DecodingException(
"Expected padding character but found '" + ch + "' at index " + readChars);
} else {
bitBuffer <<= alphabet.bitsPerChar;
bitBuffer |= alphabet.decode(ch);
bitBufferLength += alphabet.bitsPerChar;
if (bitBufferLength >= 8) {
bitBufferLength -= 8;
return (bitBuffer >> bitBufferLength) & 0xFF;
public void close() throws IOException {
public BaseEncoding omitPadding() {
return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null);
public BaseEncoding withPadChar(char padChar) {
if (8 % alphabet.bitsPerChar == 0 ||
(paddingChar != null && paddingChar.charValue() == padChar)) {
return this;
} else {
return new StandardBaseEncoding(alphabet, padChar);
public BaseEncoding withSeparator(String separator, int afterEveryChars) {
"Separator cannot contain alphabet or padding characters");
return new SeparatedBaseEncoding(this, separator, afterEveryChars);
private transient BaseEncoding upperCase;
private transient BaseEncoding lowerCase;
public BaseEncoding upperCase() {
BaseEncoding result = upperCase;
if (result == null) {
Alphabet upper = alphabet.upperCase();
result = upperCase =
(upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar);
return result;
public BaseEncoding lowerCase() {
BaseEncoding result = lowerCase;
if (result == null) {
Alphabet lower = alphabet.lowerCase();
result = lowerCase =
(lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar);
return result;
public String toString() {
StringBuilder builder = new StringBuilder("BaseEncoding.");
if (8 % alphabet.bitsPerChar != 0) {
if (paddingChar == null) {
} else {
return builder.toString();
static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) {
return new CharInput() {
public int read() throws IOException {
int readChar;
do {
readChar = delegate.read();
} while (readChar != -1 && toIgnore.matches((char) readChar));
return readChar;
public void close() throws IOException {
static CharOutput separatingOutput(
final CharOutput delegate, final String separator, final int afterEveryChars) {
checkArgument(afterEveryChars > 0);
return new CharOutput() {
int charsUntilSeparator = afterEveryChars;
public void write(char c) throws IOException {
if (charsUntilSeparator == 0) {
for (int i = 0; i < separator.length(); i++) {
charsUntilSeparator = afterEveryChars;
public void flush() throws IOException {
public void close() throws IOException {
static final class SeparatedBaseEncoding extends BaseEncoding {
private final BaseEncoding delegate;
private final String separator;
private final int afterEveryChars;
private final CharMatcher separatorChars;
SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
this.delegate = checkNotNull(delegate);
this.separator = checkNotNull(separator);
this.afterEveryChars = afterEveryChars;
afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
this.separatorChars = CharMatcher.anyOf(separator).precomputed();
CharMatcher padding() {
return delegate.padding();
int maxEncodedSize(int bytes) {
int unseparatedSize = delegate.maxEncodedSize(bytes);
return unseparatedSize + separator.length()
* divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
ByteOutput encodingStream(final CharOutput output) {
return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars));
int maxDecodedSize(int chars) {
return delegate.maxDecodedSize(chars);
ByteInput decodingStream(final CharInput input) {
return delegate.decodingStream(ignoringInput(input, separatorChars));
public BaseEncoding omitPadding() {
return delegate.omitPadding().withSeparator(separator, afterEveryChars);
public BaseEncoding withPadChar(char padChar) {
return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
public BaseEncoding withSeparator(String separator, int afterEveryChars) {
throw new UnsupportedOperationException("Already have a separator");
public BaseEncoding upperCase() {
return delegate.upperCase().withSeparator(separator, afterEveryChars);
public BaseEncoding lowerCase() {
return delegate.lowerCase().withSeparator(separator, afterEveryChars);
public String toString() {
return delegate.toString() +
".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";