com.siashan.toolkit.crypt.binary.BaseNCodec Maven / Gradle / Ivy

Go to download
package com.siashan.toolkit.crypt.binary;

import com.siashan.toolkit.crypt.*;
import com.siashan.toolkit.crypt.util.StringUtils;

import java.util.Arrays;
import java.util.Objects;

/**
 * Base 算法抽象类
 *
 * @author siashan
 * @since v1.0.7
 */
public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {

    /**
     * 保存线程上下文，因此类可以是线程安全的
     *
     * 这个类本身不是线程安全的；每个线程必须分配自己的副本.
     *
     * @since 1.0.7
     */
    static class Context {

        /**
         * 我们正在处理的基于逻辑的字节的占位符.
         * 按位操作存储并提取此变量的编码或解码.
         */
        int ibitWorkArea;

        /**
         * 我们正在处理的基于逻辑的字节的占位符.
         * 按位操作存储并提取此变量的编码或解码.
         */
        long lbitWorkArea;

        /**
         * 流媒体缓冲区.
         */
        byte[] buffer;

        /**
         * 应在缓冲区中写入下一个字符的位置.
         */
        int pos;

        /**
         * 从缓冲区读取下一个字符的位置.
         */
        int readPos;

        /**
         * 表示已达到EOF的布尔标志。一旦达到EOF，该对象将变得无用,必须扔掉
         */
        boolean eof;

        /**
         * 变量跟踪已写入当前行的字符数。仅在编码时使用。我们使用
         * 它可以确保每一个编码的行永远不会超过lineLength（如果lineLength>；0）
         */
        int currentLinePos;

        /**
         * 只有在编码时每读取3/5次，解码时每读取4/8次后，才会写入缓冲区。这变量有助于跟踪它。
         */
        int modulus;

        Context() {
        }

        /**
         * 返回对调试有用的字符串（特别是在调试器中）
         *
         * @return 用于调试的字符串。
         */
        @SuppressWarnings("boxing") // OK to ignore boxing here
        @Override
        public String toString() {
            return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
                    "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
                    currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
        }
    }

    /**
     * EOF
     *
     * @since 1.0.7
     */
    static final int EOF = -1;

    /**
     *  根据RFC 2045第6.8节的MIME块大小
     *
     * 
     * {@value}字符限制不计算尾随的CRLF，但计算所有其他字符，包括任何字符等号。
     * 
     *
     * @see RFC 2045 section 6.8
     */
    public static final int MIME_CHUNK_SIZE = 76;

    /**
     * 根据RFC 1421第4.3.2.4节，PEM块大小.
     *
     * 
     * {@value}字符限制不计算尾随的CRLF，但计算所有其他字符，包括任何等号
     * 
     *
     * @see RFC 1421 section 4.3.2.4
     */
    public static final int PEM_CHUNK_SIZE = 64;

    private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;

    /**
     * 定义默认缓冲区大小-当前为{@value}
     * -必须足够大，以容纳至少一个编码块+分隔符
     */
    private static final int DEFAULT_BUFFER_SIZE = 8192;

    /**
     * 要分配的最大缓冲区大小.
     *
     * 这与JDK{@code java.util.ArrayList}中使用的大小相同:
     * 
     * 有些虚拟机在数组中保留一些头字。
     * 尝试分配较大的阵列可能会导致
     * OutOfMemoryError:请求的数组大小超过VM限制。
     * 
     */
    private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8;

    /** 用于提取8位的掩码，用于解码字节 */
    protected static final int MASK_8BITS = 0xff;

    /**
     * 用于填充输出的字节.
     */
    protected static final byte PAD_DEFAULT = '='; // Allow static access to default

    /**
     * 默认解码策略.
     */
    protected static final CodecPolicy DECODING_POLICY_DEFAULT = CodecPolicy.LENIENT;

    /**
     * 符合RFC 2045第2.1节的区块分隔符.
     *
     * @see RFC 2045 section 2.1
     */
    static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};

    /**
     * 比较两个{@code int}值，并对这些值进行数值处理没有签名。摘自JDK1.8
     *
     *
     * @param  x 要比较的第一个{@code int}
     * @param  y 要比较的第二个{@code int}
     * @return 如果{@code x==y}，则值{@code 0}；没有价值
     *          如果{@code xy}为，则大于{@code 0}的值
     *          无符号值
     */
    private static int compareUnsigned(final int x, final int y) {
        return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE);
    }

    /**
     * 创建至少与最小所需容量相同的正容量。
     * 如果最小容量为负，则会抛出OutOfMemoryError，因为没有数组
     * 可以分配.
     *
     * @param minCapacity 最小容量
     * @return 容量
     * @throws OutOfMemoryError 如果{@code minCapacity}为负
     */
    private static int createPositiveCapacity(final int minCapacity) {
        if (minCapacity < 0) {
            // overflow
            throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL));
        }
        // This is called when we require buffer expansion to a very big array.
        // Use the conservative maximum buffer size if possible, otherwise the biggest required.
        //
        // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE.
        // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full
        // Integer.MAX_VALUE length array.
        // The result is that we may have to allocate an array of this size more than once if
        // the capacity must be expanded again.
        return (minCapacity > MAX_BUFFER_SIZE) ?
            minCapacity :
            MAX_BUFFER_SIZE;
    }

    /**
     * 根据RFC 2045第2.1节获取块分隔符的副本.
     *
     * @return 块分隔符
     * @see RFC 2045 section 2.1
     * @since 1.0.7
     */
    public static byte[] getChunkSeparator() {
        return CHUNK_SEPARATOR.clone();
    }

    /**
     * 检查字节值是否为空白.
     * 空格的意思是：空格、制表符、CR、LF
     * @param byteToCheck
     *            要检查的字节
     * @return 如果字节为空白，则为true，否则为false
     */
    protected static boolean isWhiteSpace(final byte byteToCheck) {
        switch (byteToCheck) {
            case ' ' :
            case '\n' :
            case '\r' :
            case '\t' :
                return true;
            default :
                return false;
        }
    }

    /**
     * 通过 {@link #DEFAULT_BUFFER_RESIZE_FACTOR} 增加缓冲区.
     * @param context 要使用的上下文
     * @param minCapacity 所需的最小容量
     * @return 调整大小的字节[]缓冲区
     * @throws OutOfMemoryError 如果{@code minCapacity}为负
     */
    private static byte[] resizeBuffer(final Context context, final int minCapacity) {
        // Overflow-conscious code treats the min and new capacity as unsigned.
        final int oldCapacity = context.buffer.length;
        int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR;
        if (compareUnsigned(newCapacity, minCapacity) < 0) {
            newCapacity = minCapacity;
        }
        if (compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) {
            newCapacity = createPositiveCapacity(minCapacity);
        }

        final byte[] b = new byte[newCapacity];
        System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
        context.buffer = b;
        return b;
    }

    // 实例变量，以防以后需要更改
    protected final byte pad;

    /** 每个完整的未编码数据块中的字节数，例如，Base64为4，Base32为5 */
    private final int unencodedBlockSize;

    /** 每个完整编码数据块中的字节数，例如，Base64为3，Base32为8 */
    private final int encodedBlockSize;

    /**
     * 用于编码的块大小。解码时不使用.
     * 值为零或更小意味着编码数据不分块.
     * 向下舍入到encodedBlockSize的最近倍数.
     */
    protected final int lineLength;

    /**
     * 块分隔符的大小。除非{@link#lineLength}>；0.
     */
    private final int chunkSeparatorLength;

    /**
     * 定义输入字节包含剩余尾随位时的解码行为
     * 无法使用有效的编码创建。这些可以是从最终版本中未使用的位
     * 字符或整个字符。默认模式是宽松解码。将此设置为
     * {@code true}以启用严格解码。
     * 
     * 宽松：在可能的情况下，任何尾随位都被组成8位字节。其余部分将被丢弃.
     * 
严格：解码将引发{@link IllegalArgumentException}如果尾随位
     * 不是有效编码的一部分。最后一个字符中任何未使用的位必须
     * 是零。不允许对整个最终字符进行不可能计数.
     * 
     *
     * 当启用严格解码时，预计解码的字节将被重新编码
     * 与原始数组相匹配的字节数组，即最终数组没有变化
     * 性格这要求输入字节使用相同的填充和字母表
     * 作为编码器。
     * 
     */
    private final CodecPolicy decodingPolicy;

    /**
     * 注{@code lineLength}向下舍入到编码块大小的最近倍数。
     * 如果{@code chunkSeparatorLength}为零，则禁用分块。
     * @param unencodedBlockSize 未编码块的大小（例如Base64=3）
     * @param encodedBlockSize 编码块的大小（例如Base64=4）
     * @param lineLength 如果>；0，使用长度为{@code lineLength}的分块
     * @param chunkSeparatorLength 块分隔符长度（如果相关）
     */
    protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
                         final int lineLength, final int chunkSeparatorLength) {
        this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
    }

    /**
     * 注{@code lineLength}向下舍入到编码块大小的最近倍数。
     * 如果{@code chunkSeparatorLength}为零，则禁用分块。
     * @param unencodedBlockSize 未编码块的大小（例如Base64=3）
     * @param encodedBlockSize 编码块的大小（例如Base64=4）
     * @param lineLength 如果>；0，使用长度为{@code lineLength}的分块
     * @param chunkSeparatorLength 块分隔符长度（如果相关）
     * @param pad 用作填充字节的字节.
     */
    protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
                         final int lineLength, final int chunkSeparatorLength, final byte pad) {
        this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, pad, DECODING_POLICY_DEFAULT);
    }

    /**
     * 注{@code lineLength}向下舍入到编码块大小的最近倍数。
     * 如果{@code chunkSeparatorLength}为零，则禁用分块.
     * @param unencodedBlockSize 未编码块的大小（例如Base64=3）
     * @param encodedBlockSize 编码块的大小（例如Base64=4）
     * @param lineLength 如果>；0，使用长度为{@code lineLength}的分块
     * @param chunkSeparatorLength 块分隔符长度（如果相关）
     * @param pad 用作填充字节的字节.
     * @param decodingPolicy 解码策略.
     * @since 1.0.7
     */
    protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
                         final int lineLength, final int chunkSeparatorLength, final byte pad, final CodecPolicy decodingPolicy) {
        this.unencodedBlockSize = unencodedBlockSize;
        this.encodedBlockSize = encodedBlockSize;
        final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
        this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
        this.chunkSeparatorLength = chunkSeparatorLength;
        this.pad = pad;
        this.decodingPolicy = Objects.requireNonNull(decodingPolicy, "codecPolicy");
    }

    /**
     * 返回可用于读取的缓冲数据量.
     *
     * @param context 要使用的上下文
     * @return 可用于读取的缓冲数据量.
     */
    int available(final Context context) {  // package protected for access from I/O streams
        return context.buffer != null ? context.pos - context.readPos : 0;
    }

    /**
     * 测试给定的字节数组，查看它是否包含字母表或键盘中的任何字符.
     *
     * 用于检查行尾数组
     *
     * @param arrayOctet
     *            要测试的字节数组
     * @return {@code true}如果任何字节是字母表或键盘中的有效字符；{@code false}否则
     */
    protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
        if (arrayOctet == null) {
            return false;
        }
        for (final byte element : arrayOctet) {
            if (pad == element || isInAlphabet(element)) {
                return true;
            }
        }
        return false;
    }

    /**
     * 解码包含Base-N字母表中字符的字节[].
     *
     * @param pArray
     *            一种包含基N字符数据的字节数组
     * @return 包含二进制数据的字节数组
     */
    @Override
    public byte[] decode(final byte[] pArray) {
        if (pArray == null || pArray.length == 0) {
            return pArray;
        }
        final Context context = new Context();
        decode(pArray, 0, pArray.length, context);
        decode(pArray, 0, EOF, context); // Notify decoder of EOF.
        final byte[] result = new byte[context.pos];
        readResults(result, 0, result.length, context);
        return result;
    }

    // package protected for access from I/O streams
    abstract void decode(byte[] pArray, int i, int length, Context context);

    /**
     * 使用Base-N算法解码对象。提供此方法是为了满足
     * 解码器接口，如果提供的对象不是byte[]或String类型，则将抛出DecoderException
     *
     * @param obj
     *            要解码的对象
     * @return 一个对象（类型为byte[]），包含对应于byte[]或字符串的二进制数据提供.
     * @throws DecoderException
     *             如果提供的参数不是byte[]类型
     */
    @Override
    public Object decode(final Object obj) throws DecoderException {
        if (obj instanceof byte[]) {
            return decode((byte[]) obj);
        } else if (obj instanceof String) {
            return decode((String) obj);
        } else {
            throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
        }
    }

    /**
     * 解码包含Base-N字母表中字符的字符串.
     *
     * @param pArray
     *            包含以N为基数的字符数据的字符串
     * @return 包含二进制数据的字节数组
     */
    public byte[] decode(final String pArray) {
        return decode(StringUtils.getBytesUtf8(pArray));
    }

    /**
     * 将包含二进制数据的字节[]编码为包含字母表中字符的字节[].
     *
     * @param pArray
     *            包含二进制数据的字节数组
     * @return 仅包含基N字母字符数据的字节数组
     */
    @Override
    public byte[] encode(final byte[] pArray) {
        if (pArray == null || pArray.length == 0) {
            return pArray;
        }
        return encode(pArray, 0, pArray.length);
    }

    /**
     * 将包含二进制数据的字节[]编码为包含二进制数据的字节[]
     * 字母表中的字符.
     *
     * @param pArray
     *            包含二进制数据的字节数组
     * @param offset
     *            子阵列的初始偏移量。
     * @param length
     *            子阵列的长度。
     * @return 仅包含基N字母字符数据的字节数组
     */
    public byte[] encode(final byte[] pArray, final int offset, final int length) {
        if (pArray == null || pArray.length == 0) {
            return pArray;
        }
        final Context context = new Context();
        encode(pArray, offset, length, context);
        encode(pArray, offset, EOF, context); // Notify encoder of EOF.
        final byte[] buf = new byte[context.pos - context.readPos];
        readResults(buf, 0, buf.length, context);
        return buf;
    }

    // package protected for access from I/O streams
    abstract void encode(byte[] pArray, int i, int length, Context context);

    /**
     * 使用Base-N算法对对象进行编码。提供此方法是为了满足
     * 编码器接口，如果提供的对象不是byte[]类型，则将抛出EncoderException.
     *
     * @param obj
     *            对象进行编码
     * @return 一个对象（类型为byte[]），包含与提供的byte[]相对应的Base-N编码数据.
     * @throws EncoderException
     *             如果提供的参数不是byte[]类型
     */
    @Override
    public Object encode(final Object obj) throws EncoderException {
        if (!(obj instanceof byte[])) {
            throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
        }
        return encode((byte[]) obj);
    }

    /**
     * 将包含二进制数据的字节[]编码为包含相应字母表中字符的字符串.
     * UTF使用8编码.
     *
     * @param pArray 包含二进制数据的字节数组
     * @return 仅包含相应字母表中的字符数据的字符串.
    */
    public String encodeAsString(final byte[] pArray){
        return StringUtils.newStringUtf8(encode(pArray));
    }

    /**
     * 将包含二进制数据的字节[]编码为包含Base-N字母表中字符的字符串。
     * 使用 UTF8 编码.
     *
     * @param pArray
     *            包含二进制数据的字节数组
     * @return 仅包含基N字符数据的字符串
     */
    public String encodeToString(final byte[] pArray) {
        return StringUtils.newStringUtf8(encode(pArray));
    }

    /**
     * 确保缓冲区有空间容纳{@code size}字节
     *
     * @param size 所需的最小备用空间
     * @param context 要使用的上下文
     * @return 缓冲区
     */
    protected byte[] ensureBufferSize(final int size, final Context context){
        if (context.buffer == null) {
            context.buffer = new byte[Math.max(size, getDefaultBufferSize())];
            context.pos = 0;
            context.readPos = 0;

            // Overflow-conscious:
            // x + y > z  ==  x + y - z > 0
        } else if (context.pos + size - context.buffer.length > 0) {
            return resizeBuffer(context, context.pos + size);
        }
        return context.buffer;
    }

    /**
     * 返回解码行为策略.
     * 
     * 
     * 默认为宽松。如果解码策略是严格的，则解码将引发错误
     * {@link IllegalArgumentException}如果尾随位不是有效编码的一部分。解码将组成
     * 将尾随位转换为8位字节，并丢弃剩余的字节.
     * 
     *
     * @return 如果使用严格解码，则为true
     */
    public CodecPolicy getCodecPolicy() {
        return decodingPolicy;
    }

    /**
     * 获取默认缓冲区大小。可以覆盖.
     *
     * @return 默认缓冲区大小.
     */
    protected int getDefaultBufferSize() {
        return DEFAULT_BUFFER_SIZE;
    }

    /**
     * 计算对提供的数组进行编码所需的空间量.
     *
     * @param pArray 字节[]数组，稍后将对其进行编码
     *
     * @return 对提供的数组进行编码所需的空间量.
     */
    public long getEncodedLength(final byte[] pArray) {
        // Calculate non-chunked size - rounded up to allow for padding
        // cast to long is needed to avoid possibility of overflow
        long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
        if (lineLength > 0) { // We're using chunking
            // Round up to nearest multiple
            len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
        }
        return len;
    }

    /**
     * 如果此对象具有用于读取的缓冲数据，则返回true.
     *
     * @param context 要使用的上下文
     * @return 如果仍有数据可供读取，则为true.
     */
    boolean hasData(final Context context) {  // package protected for access from I/O streams
        return context.buffer != null;
    }

    /**
     * 返回{@code octet}是否在当前字母表中.
     * 不允许空白或填充.
     *
     * @param value 要测试的值
     *
     * @return {@code true}如果值是在当前字母表中定义的，则{@code false}否则.
     */
    protected abstract boolean isInAlphabet(byte value);

    /**
     * 测试给定的字节数组，看它是否只包含字母表中的有效字符.
     * 该方法选择性地将空格和pad视为有效.
     *
     * @param arrayOctet 要测试的字节数组
     * @param allowWSPad 如果{@code true}，则也允许使用空格和PAD
     *
     * @return {@code true}如果所有字节都是字母表中的有效字符，或者如果字节数组为空；否则为{@code false}
     */
    public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
        for (final byte octet : arrayOctet) {
            if (!isInAlphabet(octet) &&
                    (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) {
                return false;
            }
        }
        return true;
    }

    /**
     * 测试给定字符串，查看其是否仅包含字母表中的有效字符.
     * 该方法将空格和PAD视为有效.
     *
     * @param basen 要测试的字符串
     * @return {@code true}如果字符串中的所有字符都是字母表中的有效字符，或者
     *          字符串为空；{@code false}，否则为
     * @see #isInAlphabet(byte[], boolean)
     */
    public boolean isInAlphabet(final String basen) {
        return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
    }

    /**
     * 如果解码行为严格，则返回true。如果出现拖尾，解码将引发{@link IllegalArgumentException}
     * 位不是有效编码的一部分.
     *
     * 
     * 对于宽松解码，默认值为false。解码将把尾随位合成8位字节，并丢弃余数
     * 
     *
     * @return 如果使用严格解码，则为true
     */
    public boolean isStrictDecoding() {
        return decodingPolicy == CodecPolicy.STRICT;
    }

    /**
     * 将缓冲数据提取到提供的byte[]数组中，从位置bPos开始，最大值为bAvail字节。返回实际提取的字节数。
     * 
     * 受保护的包，可从I/O流访问.
     *
     * @param b
     *            字节[]数组，用于将缓冲数据提取到.
     * @param bPos
     *            在字节[]数组中开始提取的位置.
     * @param bAvail
     *            允许提取的字节数。我们可能提取较少的（如果可用较少）.
     * @param context
     *            要使用的上下文
     * @return 成功提取到提供的字节[]数组中的字节数.
     */
    int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
        if (context.buffer != null) {
            final int len = Math.min(available(context), bAvail);
            System.arraycopy(context.buffer, context.readPos, b, bPos, len);
            context.readPos += len;
            if (context.readPos >= context.pos) {
                context.buffer = null; // so hasData() will return false, and this method can return -1
            }
            return len;
        }
        return context.eof ? EOF : 0;
    }
}