io.airlift.compress.snappy.SnappyHadoopInputStream Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.airlift.compress.snappy;
import io.airlift.compress.hadoop.HadoopInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import static io.airlift.compress.snappy.SnappyConstants.SIZE_OF_LONG;
class SnappyHadoopInputStream
extends HadoopInputStream
{
private final SnappyDecompressor decompressor = new SnappyDecompressor();
private final InputStream in;
private int uncompressedBlockLength;
private byte[] uncompressedChunk = new byte[0];
private int uncompressedChunkOffset;
private int uncompressedChunkLength;
private byte[] compressed = new byte[0];
public SnappyHadoopInputStream(InputStream in)
{
this.in = in;
}
@Override
public int read()
throws IOException
{
if (uncompressedChunkOffset >= uncompressedChunkLength) {
readNextChunk(uncompressedChunk, 0, uncompressedChunk.length);
if (uncompressedChunkLength == 0) {
return -1;
}
}
return uncompressedChunk[uncompressedChunkOffset++] & 0xFF;
}
@Override
public int read(byte[] output, int offset, int length)
throws IOException
{
if (uncompressedChunkOffset >= uncompressedChunkLength) {
boolean directDecompress = readNextChunk(output, offset, length);
if (uncompressedChunkLength == 0) {
return -1;
}
if (directDecompress) {
uncompressedChunkOffset += uncompressedChunkLength;
return uncompressedChunkLength;
}
}
int size = Math.min(length, uncompressedChunkLength - uncompressedChunkOffset);
System.arraycopy(uncompressedChunk, uncompressedChunkOffset, output, offset, size);
uncompressedChunkOffset += size;
return size;
}
@Override
public void resetState()
{
uncompressedBlockLength = 0;
uncompressedChunkOffset = 0;
uncompressedChunkLength = 0;
}
@Override
public void close()
throws IOException
{
in.close();
}
private boolean readNextChunk(byte[] userBuffer, int userOffset, int userLength)
throws IOException
{
uncompressedBlockLength -= uncompressedChunkOffset;
uncompressedChunkOffset = 0;
uncompressedChunkLength = 0;
while (uncompressedBlockLength == 0) {
uncompressedBlockLength = readBigEndianInt();
if (uncompressedBlockLength == -1) {
uncompressedBlockLength = 0;
return false;
}
}
int compressedChunkLength = readBigEndianInt();
if (compressedChunkLength == -1) {
return false;
}
if (compressed.length < compressedChunkLength) {
// over allocate buffer which makes decompression easier
compressed = new byte[compressedChunkLength + SIZE_OF_LONG];
}
readInput(compressedChunkLength, compressed);
uncompressedChunkLength = SnappyDecompressor.getUncompressedLength(compressed, 0);
if (uncompressedChunkLength > uncompressedBlockLength) {
throw new IOException("Chunk uncompressed size is greater than block size");
}
boolean directUncompress = true;
if (uncompressedChunkLength > userLength) {
if (uncompressedChunk.length < uncompressedChunkLength) {
// over allocate buffer which makes decompression easier
uncompressedChunk = new byte[uncompressedChunkLength + SIZE_OF_LONG];
}
directUncompress = false;
userBuffer = uncompressedChunk;
userOffset = 0;
userLength = uncompressedChunk.length;
}
int bytes = decompressor.decompress(compressed, 0, compressedChunkLength, userBuffer, userOffset, userLength);
if (uncompressedChunkLength != bytes) {
throw new IOException("Expected to read " + uncompressedChunkLength + " bytes, but data only contained " + bytes + " bytes");
}
return directUncompress;
}
private void readInput(int length, byte[] buffer)
throws IOException
{
int offset = 0;
while (offset < length) {
int size = in.read(buffer, offset, length - offset);
if (size == -1) {
throw new EOFException("encountered EOF while reading block data");
}
offset += size;
}
}
private int readBigEndianInt()
throws IOException
{
int b1 = in.read();
if (b1 < 0) {
return -1;
}
int b2 = in.read();
int b3 = in.read();
int b4 = in.read();
// If any of the other bits are negative, the stream it truncated
if ((b2 | b3 | b4) < 0) {
throw new IOException("Stream is truncated");
}
return ((b1 << 24) + (b2 << 16) + (b3 << 8) + (b4));
}
}