All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.airlift.compress.snappy.SnappyFramedInputStream Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.airlift.compress.snappy;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;

import static io.airlift.compress.snappy.SnappyFramedOutputStream.MAX_BLOCK_SIZE;
import static java.lang.Math.min;

/**
 * Implements the x-snappy-framed as an {@link InputStream}.
 */
public final class SnappyFramedInputStream
        extends InputStream
{
    private final SnappyDecompressor decompressor = new SnappyDecompressor();

    private final InputStream in;
    private final byte[] frameHeader;
    private final boolean verifyChecksums;

    /**
     * A single frame read from the underlying {@link InputStream}.
     */
    private byte[] input = new byte[0];
    /**
     * The decompressed data from {@link #input}.
     */
    private byte[] uncompressed = new byte[0];
    /**
     * Indicates if this instance has been closed.
     */
    private boolean closed;
    /**
     * Indicates if we have reached the EOF on {@link #in}.
     */
    private boolean eof;
    /**
     * The position in {@link #input} to read to.
     */
    private int valid;
    /**
     * The next position to read from {@link #buffer}.
     */
    private int position;
    /**
     * Buffer is a reference to the real buffer of uncompressed data for the
     * current block: uncompressed if the block is compressed, or input if it is
     * not.
     */
    private byte[] buffer;

    public SnappyFramedInputStream(InputStream in)
            throws IOException
    {
        this(in, true);
    }

    public SnappyFramedInputStream(InputStream in, boolean verifyChecksums)
            throws IOException
    {
        this.in = in;
        this.verifyChecksums = verifyChecksums;
        allocateBuffersBasedOnSize(MAX_BLOCK_SIZE + 5);
        this.frameHeader = new byte[4];

        // stream must begin with stream header
        byte[] actualHeader = new byte[SnappyFramed.HEADER_BYTES.length];

        int read = SnappyInternalUtils.readBytes(in, actualHeader, 0, actualHeader.length);
        if (read < SnappyFramed.HEADER_BYTES.length) {
            throw new EOFException("encountered EOF while reading stream header");
        }
        if (!Arrays.equals(SnappyFramed.HEADER_BYTES, actualHeader)) {
            throw new IOException("invalid stream header");
        }
    }

    @Override
    public int read()
            throws IOException
    {
        if (closed) {
            return -1;
        }
        if (!ensureBuffer()) {
            return -1;
        }
        return buffer[position++] & 0xFF;
    }

    @Override
    public int read(byte[] output, int offset, int length)
            throws IOException
    {
        SnappyInternalUtils.checkNotNull(output, "output is null");
        SnappyInternalUtils.checkPositionIndexes(offset, offset + length, output.length);
        if (closed) {
            throw new IOException("Stream is closed");
        }

        if (length == 0) {
            return 0;
        }
        if (!ensureBuffer()) {
            return -1;
        }

        int size = min(length, available());
        System.arraycopy(buffer, position, output, offset, size);
        position += size;
        return size;
    }

    @Override
    public int available()
            throws IOException
    {
        if (closed) {
            return 0;
        }
        return valid - position;
    }

    @Override
    public void close()
            throws IOException
    {
        try {
            in.close();
        }
        finally {
            if (!closed) {
                closed = true;
            }
        }
    }

    private boolean ensureBuffer()
            throws IOException
    {
        if (available() > 0) {
            return true;
        }
        if (eof) {
            return false;
        }

        if (!readBlockHeader()) {
            eof = true;
            return false;
        }

        // get action based on header
        FrameMetaData frameMetaData = getFrameMetaData(frameHeader);

        if (FrameAction.SKIP == frameMetaData.frameAction) {
            SnappyInternalUtils.skip(in, frameMetaData.length);
            return ensureBuffer();
        }

        if (frameMetaData.length > input.length) {
            allocateBuffersBasedOnSize(frameMetaData.length);
        }

        int actualRead = SnappyInternalUtils.readBytes(in, input, 0, frameMetaData.length);
        if (actualRead != frameMetaData.length) {
            throw new EOFException("unexpectd EOF when reading frame");
        }

        FrameData frameData = getFrameData(input);

        if (FrameAction.UNCOMPRESS == frameMetaData.frameAction) {
            int uncompressedLength = SnappyDecompressor.getUncompressedLength(input, frameData.offset);

            if (uncompressedLength > uncompressed.length) {
                uncompressed = new byte[uncompressedLength];
            }

            this.valid = decompressor.decompress(input, frameData.offset, actualRead - frameData.offset, uncompressed, 0, uncompressed.length);
            this.buffer = uncompressed;
            this.position = 0;
        }
        else {
            // we need to start reading at the offset
            this.position = frameData.offset;
            this.buffer = input;
            // valid is until the end of the read data, regardless of offset
            // indicating where we start
            this.valid = actualRead;
        }

        if (verifyChecksums) {
            int actualCrc32c = Crc32C.maskedCrc32c(buffer, position, valid - position);
            if (frameData.checkSum != actualCrc32c) {
                throw new IOException("Corrupt input: invalid checksum");
            }
        }

        return true;
    }

    private void allocateBuffersBasedOnSize(int size)
    {
        if (input.length < size) {
            input = new byte[size];
        }
        if (uncompressed.length < size) {
            uncompressed = new byte[size];
        }
    }

    /**
     * Use the content of the frameHeader to describe what type of frame we have
     * and the action to take.
     */
    private static FrameMetaData getFrameMetaData(byte[] frameHeader)
            throws IOException
    {
        int length = (frameHeader[1] & 0xFF);
        length |= (frameHeader[2] & 0xFF) << 8;
        length |= (frameHeader[3] & 0xFF) << 16;

        int minLength;
        FrameAction frameAction;
        int flag = frameHeader[0] & 0xFF;
        switch (flag) {
            case SnappyFramed.COMPRESSED_DATA_FLAG:
                frameAction = FrameAction.UNCOMPRESS;
                minLength = 5;
                break;
            case SnappyFramed.UNCOMPRESSED_DATA_FLAG:
                frameAction = FrameAction.RAW;
                minLength = 5;
                break;
            case SnappyFramed.STREAM_IDENTIFIER_FLAG:
                if (length != 6) {
                    throw new IOException("stream identifier chunk with invalid length: " + length);
                }
                frameAction = FrameAction.SKIP;
                minLength = 6;
                break;
            default:
                // Reserved unskippable chunks (chunk types 0x02-0x7f)
                if (flag <= 0x7f) {
                    throw new IOException("unsupported unskippable chunk: " + Integer.toHexString(flag));
                }

                // all that is left is Reserved skippable chunks (chunk types 0x80-0xfe)
                frameAction = FrameAction.SKIP;
                minLength = 0;
        }

        if (length < minLength) {
            throw new IOException("invalid length: " + length + " for chunk flag: " + Integer.toHexString(flag));
        }

        return new FrameMetaData(frameAction, length);
    }

    /**
     * Extract frame data
     *
     * @param content The content of the of the frame. Content begins at index {@code 0}.
     * @return Metadata about the content of the frame.
     */
    private static FrameData getFrameData(byte[] content)
    {
        // crc is contained in the frame content
        int crc32c = (content[3] & 0xFF) << 24 |
                (content[2] & 0xFF) << 16 |
                (content[1] & 0xFF) << 8 |
                (content[0] & 0xFF);

        return new FrameData(crc32c, 4);
    }

    private boolean readBlockHeader()
            throws IOException
    {
        int read = SnappyInternalUtils.readBytes(in, frameHeader, 0, frameHeader.length);

        if (read == -1) {
            return false;
        }

        if (read < frameHeader.length) {
            throw new EOFException("encountered EOF while reading block header");
        }

        return true;
    }

    enum FrameAction
    {
        RAW, SKIP, UNCOMPRESS
    }

    public static final class FrameMetaData
    {
        final int length;
        final FrameAction frameAction;

        /**
         * @param frameAction
         * @param length
         */
        public FrameMetaData(FrameAction frameAction, int length)
        {
            this.frameAction = frameAction;
            this.length = length;
        }
    }

    public static final class FrameData
    {
        final int checkSum;
        final int offset;

        public FrameData(int checkSum, int offset)
        {
            this.checkSum = checkSum;
            this.offset = offset;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy