All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.io.compress.CompressedRandomAccessReader Maven / Gradle / Ivy

Go to download

The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.

There is a newer version: 5.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.io.compress;

import java.io.*;
import java.nio.ByteBuffer;
import java.util.concurrent.ThreadLocalRandom;
import java.util.zip.Checksum;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.primitives.Ints;

import org.apache.cassandra.io.FSReadError;
import org.apache.cassandra.io.sstable.CorruptSSTableException;
import org.apache.cassandra.io.util.*;
import org.apache.cassandra.utils.memory.BufferPool;

/**
 * CRAR extends RAR to transparently uncompress blocks from the file into RAR.buffer.  Most of the RAR
 * "read bytes from the buffer, rebuffering when necessary" machinery works unchanged after that.
 */
public class CompressedRandomAccessReader extends RandomAccessReader
{
    private final CompressionMetadata metadata;

    // we read the raw compressed bytes into this buffer, then move the uncompressed ones into super.buffer.
    private ByteBuffer compressed;

    // re-use single crc object
    private final Checksum checksum;

    // raw checksum bytes
    private ByteBuffer checksumBytes;

    @VisibleForTesting
    public double getCrcCheckChance()
    {
        return metadata.parameters.getCrcCheckChance();
    }

    protected CompressedRandomAccessReader(Builder builder)
    {
        super(builder);
        this.metadata = builder.metadata;
        this.checksum = metadata.checksumType.newInstance();

        if (regions == null)
        {
            compressed = allocateBuffer(metadata.compressor().initialCompressedBufferLength(metadata.chunkLength()), bufferType);
            checksumBytes = ByteBuffer.wrap(new byte[4]);
        }
    }

    @Override
    protected void releaseBuffer()
    {
        try
        {
            if (buffer != null)
            {
                BufferPool.put(buffer);
                buffer = null;
            }
        }
        finally
        {
            // this will always be null if using mmap access mode (unlike in parent, where buffer is set to a region)
            if (compressed != null)
            {
                BufferPool.put(compressed);
                compressed = null;
            }
        }
    }

    @Override
    protected void reBufferStandard()
    {
        try
        {
            long position = current();
            assert position < metadata.dataLength;

            CompressionMetadata.Chunk chunk = metadata.chunkFor(position);

            if (compressed.capacity() < chunk.length)
            {
                BufferPool.put(compressed);
                compressed = allocateBuffer(chunk.length, bufferType);
            }
            else
            {
                compressed.clear();
            }

            compressed.limit(chunk.length);
            if (channel.read(compressed, chunk.offset) != chunk.length)
                throw new CorruptBlockException(getPath(), chunk);

            compressed.flip();
            buffer.clear();

            if (getCrcCheckChance() >= 1d ||
                    getCrcCheckChance() > ThreadLocalRandom.current().nextDouble())
            {
                if (checksum(chunk) != (int) metadata.checksumType.of(compressed))
                    throw new CorruptBlockException(getPath(), chunk);

                compressed.rewind();
            }

            try
            {
                metadata.compressor().uncompress(compressed, buffer);
            }
            catch (IOException e)
            {
                throw new CorruptBlockException(getPath(), chunk, e);
            }
            finally
            {
                buffer.flip();
            }

            // buffer offset is always aligned
            bufferOffset = position & ~(buffer.capacity() - 1);
            buffer.position((int) (position - bufferOffset));
            // the length() can be provided at construction time, to override the true (uncompressed) length of the file;
            // this is permitted to occur within a compressed segment, so we truncate validBufferBytes if we cross the imposed length
            if (bufferOffset + buffer.limit() > length())
                buffer.limit((int)(length() - bufferOffset));
        }
        catch (CorruptBlockException e)
        {
            // Make sure reader does not see stale data.
            buffer.position(0).limit(0);
            throw new CorruptSSTableException(e, getPath());
        }
        catch (IOException e)
        {
            throw new FSReadError(e, getPath());
        }
    }

    @Override
    protected void reBufferMmap()
    {
        try
        {
            long position = current();
            assert position < metadata.dataLength;

            CompressionMetadata.Chunk chunk = metadata.chunkFor(position);

            MmappedRegions.Region region = regions.floor(chunk.offset);
            long segmentOffset = region.bottom();
            int chunkOffset = Ints.checkedCast(chunk.offset - segmentOffset);
            ByteBuffer compressedChunk = region.buffer.duplicate(); // TODO: change to slice(chunkOffset) when we upgrade LZ4-java

            compressedChunk.position(chunkOffset).limit(chunkOffset + chunk.length);

            buffer.clear();

            if (getCrcCheckChance() >= 1d ||
                getCrcCheckChance() > ThreadLocalRandom.current().nextDouble())
            {
                int checksum = (int) metadata.checksumType.of(compressedChunk);
                compressedChunk.limit(compressedChunk.capacity());
                if (compressedChunk.getInt() != checksum)
                    throw new CorruptBlockException(getPath(), chunk);

                compressedChunk.position(chunkOffset).limit(chunkOffset + chunk.length);
            }

            try
            {
                metadata.compressor().uncompress(compressedChunk, buffer);
            }
            catch (IOException e)
            {
                throw new CorruptBlockException(getPath(), chunk, e);
            }
            finally
            {
                buffer.flip();
            }

            // buffer offset is always aligned
            bufferOffset = position & ~(buffer.capacity() - 1);
            buffer.position((int) (position - bufferOffset));
            // the length() can be provided at construction time, to override the true (uncompressed) length of the file;
            // this is permitted to occur within a compressed segment, so we truncate validBufferBytes if we cross the imposed length
            if (bufferOffset + buffer.limit() > length())
                buffer.limit((int)(length() - bufferOffset));
        }
        catch (CorruptBlockException e)
        {
            // Make sure reader does not see stale data.
            buffer.position(0).limit(0);
            throw new CorruptSSTableException(e, getPath());
        }

    }

    private int checksum(CompressionMetadata.Chunk chunk) throws IOException
    {
        long position = chunk.offset + chunk.length;
        checksumBytes.clear();
        if (channel.read(checksumBytes, position) != checksumBytes.capacity())
            throw new CorruptBlockException(getPath(), chunk);
        return checksumBytes.getInt(0);
    }

    @Override
    public long length()
    {
        return metadata.dataLength;
    }

    @Override
    public String toString()
    {
        return String.format("%s - chunk length %d, data length %d.", getPath(), metadata.chunkLength(), metadata.dataLength);
    }

    public final static class Builder extends RandomAccessReader.Builder
    {
        private final CompressionMetadata metadata;

        public Builder(ICompressedFile file)
        {
            super(file.channel());
            this.metadata = applyMetadata(file.getMetadata());
            this.regions = file.regions();
        }

        public Builder(ChannelProxy channel, CompressionMetadata metadata)
        {
            super(channel);
            this.metadata = applyMetadata(metadata);
        }

        private CompressionMetadata applyMetadata(CompressionMetadata metadata)
        {
            this.overrideLength = metadata.compressedFileLength;
            this.bufferSize = metadata.chunkLength();
            this.bufferType = metadata.compressor().preferredBufferType();

            assert Integer.bitCount(this.bufferSize) == 1; //must be a power of two

            return metadata;
        }

        @Override
        protected ByteBuffer createBuffer()
        {
            buffer = allocateBuffer(bufferSize, bufferType);
            buffer.limit(0);
            return buffer;
        }

        @Override
        public RandomAccessReader build()
        {
            return new CompressedRandomAccessReader(this);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy