io.airlift.compress.lzo.HadoopLzopInputStream Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.airlift.compress.lzo;
import org.apache.hadoop.io.compress.CompressionInputStream;
import java.io.ByteArrayInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.zip.Adler32;
import java.util.zip.Checksum;
import static io.airlift.compress.lzo.LzoConstants.SIZE_OF_LONG;
import static io.airlift.compress.lzo.LzopCodec.LZOP_MAGIC;
import static io.airlift.compress.lzo.LzopCodec.LZOP_IMPLEMENTATION_VERSION;
import static io.airlift.compress.lzo.LzopCodec.LZO_1X_VARIANT;
import static java.lang.String.format;
class HadoopLzopInputStream
extends CompressionInputStream
{
private static final int LZO_IMPLEMENTATION_VERSION = 0x2060;
private final LzoDecompressor decompressor = new LzoDecompressor();
private final InputStream in;
private final byte[] uncompressedChunk;
private int uncompressedLength;
private int uncompressedOffset;
private boolean finished;
private byte[] compressed = new byte[0];
public HadoopLzopInputStream(InputStream in, int maxUncompressedLength)
throws IOException
{
super(in);
this.in = in;
// over allocate buffer which makes decompression easier
uncompressedChunk = new byte[maxUncompressedLength + SIZE_OF_LONG];
byte[] magic = new byte[LZOP_MAGIC.length];
readInput(magic, 0, magic.length);
if (!Arrays.equals(magic, LZOP_MAGIC)) {
throw new IOException("Not an LZOP file");
}
byte[] header = new byte[25];
readInput(header, 0, header.length);
ByteArrayInputStream headerStream = new ByteArrayInputStream(header);
// lzop version: ignored
readBigEndianShort(headerStream);
// lzo version
int lzoVersion = readBigEndianShort(headerStream);
if (lzoVersion > LZO_IMPLEMENTATION_VERSION) {
throw new IOException(format("Unsupported LZO version 0x%08X", lzoVersion));
}
// lzop version of the format
int lzopCompatibility = readBigEndianShort(headerStream);
if (lzopCompatibility > LZOP_IMPLEMENTATION_VERSION) {
throw new IOException(format("Unsupported LZOP version 0x%08X", lzopCompatibility));
}
// variant: must be LZO 1X
int variant = headerStream.read();
if (variant != LZO_1X_VARIANT) {
throw new IOException(format("Unsupported LZO variant %s", variant));
}
// level: ignored
headerStream.read();
// flags: none supported
int flags = readBigEndianInt(headerStream);
if (flags != 0) {
throw new IOException(format("Unsupported LZO flags %s", flags));
}
// output file mode: ignored
readBigEndianInt(headerStream);
// output file modified time: ignored
readBigEndianInt(headerStream);
// output file time zone offset: ignored
readBigEndianInt(headerStream);
// output file name: ignored
int fileNameLength = headerStream.read();
byte[] fileName = new byte[fileNameLength];
readInput(fileName, 0, fileName.length);
// verify header checksum
int headerChecksumValue = readBigEndianInt(in);
Checksum headerChecksum = new Adler32();
headerChecksum.update(header, 0, header.length);
headerChecksum.update(fileName, 0, fileName.length);
if (headerChecksumValue != (int) headerChecksum.getValue()) {
throw new IOException("Invalid header checksum");
}
}
@Override
public int read()
throws IOException
{
if (finished) {
return -1;
}
while (uncompressedOffset >= uncompressedLength) {
int compressedLength = bufferCompressedData();
if (finished) {
return -1;
}
decompress(compressedLength, uncompressedChunk, 0, uncompressedChunk.length);
}
return uncompressedChunk[uncompressedOffset++] & 0xFF;
}
@Override
public int read(byte[] output, int offset, int length)
throws IOException
{
if (finished) {
return -1;
}
while (uncompressedOffset >= uncompressedLength) {
int compressedLength = bufferCompressedData();
if (finished) {
return -1;
}
// favor writing directly to user buffer to avoid extra copy
if (length >= uncompressedLength) {
decompress(compressedLength, output, offset, length);
uncompressedOffset = uncompressedLength;
return uncompressedLength;
}
decompress(compressedLength, uncompressedChunk, 0, uncompressedChunk.length);
}
int size = Math.min(length, uncompressedLength - uncompressedOffset);
System.arraycopy(uncompressedChunk, uncompressedOffset, output, offset, size);
uncompressedOffset += size;
return size;
}
@Override
public void resetState()
throws IOException
{
uncompressedLength = 0;
uncompressedOffset = 0;
finished = false;
}
private int bufferCompressedData()
throws IOException
{
uncompressedOffset = 0;
uncompressedLength = readBigEndianInt(in);
if (uncompressedLength == -1) {
// LZOP file MUST end with uncompressedLength == 0
throw new EOFException("encountered EOF while reading block data");
}
if (uncompressedLength == 0) {
finished = true;
return -1;
}
int compressedLength = readBigEndianInt(in);
if (compressedLength == -1) {
throw new EOFException("encountered EOF while reading block data");
}
return compressedLength;
}
private void decompress(int compressedLength, byte[] output, int outputOffset, int outputLength)
throws IOException
{
if (uncompressedLength == compressedLength) {
readInput(output, outputOffset, compressedLength);
}
else {
if (compressed.length < compressedLength) {
// over allocate buffer which makes decompression easier
compressed = new byte[compressedLength + SIZE_OF_LONG];
}
readInput(compressed, 0, compressedLength);
int actualUncompressedLength = decompressor.decompress(compressed, 0, compressedLength, output, outputOffset, outputLength);
if (actualUncompressedLength != uncompressedLength) {
throw new IOException("Decompressor did not decompress the entire block");
}
}
}
private void readInput(byte[] buffer, int offset, int length)
throws IOException
{
while (length > 0) {
int size = in.read(buffer, offset, length);
if (size == -1) {
throw new EOFException("encountered EOF while reading block data");
}
offset += size;
length -= size;
}
}
private static int readBigEndianShort(InputStream in)
throws IOException
{
int b1 = in.read();
if (b1 < 0) {
return -1;
}
int b2 = in.read();
// If second byte is negative, the stream it truncated
if ((b2) < 0) {
throw new IOException("Stream is truncated");
}
return (b1 << 8) + (b2);
}
private static int readBigEndianInt(InputStream in)
throws IOException
{
int b1 = in.read();
if (b1 < 0) {
return -1;
}
int b2 = in.read();
int b3 = in.read();
int b4 = in.read();
// If any of the other bits are negative, the stream it truncated
if ((b2 | b3 | b4) < 0) {
throw new IOException("Stream is truncated");
}
return ((b1 << 24) + (b2 << 16) + (b3 << 8) + (b4));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy