All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.io.sstable.SSTableMetadata Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.io.sstable;

import java.io.*;
import java.nio.ByteBuffer;
import java.util.*;

import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.Pair;
import org.apache.cassandra.utils.StreamingHistogram;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.db.commitlog.ReplayPosition;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.utils.EstimatedHistogram;

/**
 * Metadata for a SSTable.
 * Metadata includes:
 *  - estimated row size histogram
 *  - estimated column count histogram
 *  - replay position
 *  - max column timestamp
 *  - max local deletion time
 *  - bloom filter fp chance
 *  - compression ratio
 *  - partitioner
 *  - generations of sstables from which this sstable was compacted, if any
 *  - tombstone drop time histogram
 *
 * An SSTableMetadata should be instantiated via the Collector, openFromDescriptor()
 * or createDefaultInstance()
 */
public class SSTableMetadata
{
    public static final double NO_BLOOM_FLITER_FP_CHANCE = -1.0;
    public static final double NO_COMPRESSION_RATIO = -1.0;
    public static final SSTableMetadataSerializer serializer = new SSTableMetadataSerializer();

    public final EstimatedHistogram estimatedRowSize;
    public final EstimatedHistogram estimatedColumnCount;
    public final ReplayPosition replayPosition;
    public final long minTimestamp;
    public final long maxTimestamp;
    public final int maxLocalDeletionTime;
    public final double bloomFilterFPChance;
    public final double compressionRatio;
    public final String partitioner;
    public final StreamingHistogram estimatedTombstoneDropTime;
    public final int sstableLevel;
    public final List maxColumnNames;
    public final List minColumnNames;

    private SSTableMetadata()
    {
        this(defaultRowSizeHistogram(),
             defaultColumnCountHistogram(),
             ReplayPosition.NONE,
             Long.MIN_VALUE,
             Long.MAX_VALUE,
             Integer.MAX_VALUE,
             NO_BLOOM_FLITER_FP_CHANCE,
             NO_COMPRESSION_RATIO,
             null,
             defaultTombstoneDropTimeHistogram(),
             0,
             Collections.emptyList(),
             Collections.emptyList());
    }

    private SSTableMetadata(EstimatedHistogram rowSizes,
                            EstimatedHistogram columnCounts,
                            ReplayPosition replayPosition,
                            long minTimestamp,
                            long maxTimestamp,
                            int maxLocalDeletionTime,
                            double bloomFilterFPChance,
                            double compressionRatio,
                            String partitioner,
                            StreamingHistogram estimatedTombstoneDropTime,
                            int sstableLevel,
                            List minColumnNames,
                            List maxColumnNames)
    {
        this.estimatedRowSize = rowSizes;
        this.estimatedColumnCount = columnCounts;
        this.replayPosition = replayPosition;
        this.minTimestamp = minTimestamp;
        this.maxTimestamp = maxTimestamp;
        this.maxLocalDeletionTime = maxLocalDeletionTime;
        this.bloomFilterFPChance = bloomFilterFPChance;
        this.compressionRatio = compressionRatio;
        this.partitioner = partitioner;
        this.estimatedTombstoneDropTime = estimatedTombstoneDropTime;
        this.sstableLevel = sstableLevel;
        this.minColumnNames = minColumnNames;
        this.maxColumnNames = maxColumnNames;
    }

    public static Collector createCollector(AbstractType columnNameComparator)
    {
        return new Collector(columnNameComparator);
    }

    public static Collector createCollector(Collection sstables, AbstractType columnNameComparator, int level)
    {
        Collector collector = new Collector(columnNameComparator);

        collector.replayPosition(ReplayPosition.getReplayPosition(sstables));
        collector.sstableLevel(level);
        // Get the max timestamp of the precompacted sstables
        // and adds generation of live ancestors
        for (SSTableReader sstable : sstables)
        {
            collector.addAncestor(sstable.descriptor.generation);
            for (Integer i : sstable.getAncestors())
            {
                if (new File(sstable.descriptor.withGeneration(i).filenameFor(Component.DATA)).exists())
                    collector.addAncestor(i);
            }
        }

        return collector;
    }

    /**
     * Used when updating sstablemetadata files with an sstable level
     * @param metadata
     * @param sstableLevel
     * @return
     */
    @Deprecated
    public static SSTableMetadata copyWithNewSSTableLevel(SSTableMetadata metadata, int sstableLevel)
    {
        return new SSTableMetadata(metadata.estimatedRowSize,
                                   metadata.estimatedColumnCount,
                                   metadata.replayPosition,
                                   metadata.minTimestamp,
                                   metadata.maxTimestamp,
                                   metadata.maxLocalDeletionTime,
                                   metadata.bloomFilterFPChance,
                                   metadata.compressionRatio,
                                   metadata.partitioner,
                                   metadata.estimatedTombstoneDropTime,
                                   sstableLevel,
                                   metadata.minColumnNames,
                                   metadata.maxColumnNames);

    }

    static EstimatedHistogram defaultColumnCountHistogram()
    {
        // EH of 114 can track a max value of 2395318855, i.e., > 2B columns
        return new EstimatedHistogram(114);
    }

    static EstimatedHistogram defaultRowSizeHistogram()
    {
        // EH of 150 can track a max value of 1697806495183, i.e., > 1.5PB
        return new EstimatedHistogram(150);
    }

    static StreamingHistogram defaultTombstoneDropTimeHistogram()
    {
        return new StreamingHistogram(SSTable.TOMBSTONE_HISTOGRAM_BIN_SIZE);
    }

    /**
     * @param gcBefore
     * @return estimated droppable tombstone ratio at given gcBefore time.
     */
    public double getEstimatedDroppableTombstoneRatio(int gcBefore)
    {
        long estimatedColumnCount = this.estimatedColumnCount.mean() * this.estimatedColumnCount.count();
        if (estimatedColumnCount > 0)
        {
            double droppable = getDroppableTombstonesBefore(gcBefore);
            return droppable / estimatedColumnCount;
        }
        return 0.0f;
    }

    /**
     * Get the amount of droppable tombstones
     * @param gcBefore the gc time
     * @return amount of droppable tombstones
     */
    public double getDroppableTombstonesBefore(int gcBefore)
    {
        return estimatedTombstoneDropTime.sum(gcBefore);
    }

    public static class Collector
    {
        protected EstimatedHistogram estimatedRowSize = defaultRowSizeHistogram();
        protected EstimatedHistogram estimatedColumnCount = defaultColumnCountHistogram();
        protected ReplayPosition replayPosition = ReplayPosition.NONE;
        protected long minTimestamp = Long.MAX_VALUE;
        protected long maxTimestamp = Long.MIN_VALUE;
        protected int maxLocalDeletionTime = Integer.MIN_VALUE;
        protected double compressionRatio = NO_COMPRESSION_RATIO;
        protected Set ancestors = new HashSet();
        protected StreamingHistogram estimatedTombstoneDropTime = defaultTombstoneDropTimeHistogram();
        protected int sstableLevel;
        protected List minColumnNames = Collections.emptyList();
        protected List maxColumnNames = Collections.emptyList();
        private final AbstractType columnNameComparator;

        private Collector(AbstractType columnNameComparator)
        {
            this.columnNameComparator = columnNameComparator;
        }
        public void addRowSize(long rowSize)
        {
            estimatedRowSize.add(rowSize);
        }

        public void addColumnCount(long columnCount)
        {
            estimatedColumnCount.add(columnCount);
        }

        public void mergeTombstoneHistogram(StreamingHistogram histogram)
        {
            estimatedTombstoneDropTime.merge(histogram);
        }

        /**
         * Ratio is compressed/uncompressed and it is
         * if you have 1.x then compression isn't helping
         */
        public void addCompressionRatio(long compressed, long uncompressed)
        {
            compressionRatio = (double) compressed/uncompressed;
        }

        public void updateMinTimestamp(long potentialMin)
        {
            minTimestamp = Math.min(minTimestamp, potentialMin);
        }

        public void updateMaxTimestamp(long potentialMax)
        {
            maxTimestamp = Math.max(maxTimestamp, potentialMax);
        }

        public void updateMaxLocalDeletionTime(int maxLocalDeletionTime)
        {
            this.maxLocalDeletionTime = Math.max(this.maxLocalDeletionTime, maxLocalDeletionTime);
        }

        public SSTableMetadata finalizeMetadata(String partitioner, double bloomFilterFPChance)
        {
            return new SSTableMetadata(estimatedRowSize,
                                       estimatedColumnCount,
                                       replayPosition,
                                       minTimestamp,
                                       maxTimestamp,
                                       maxLocalDeletionTime,
                                       bloomFilterFPChance,
                                       compressionRatio,
                                       partitioner,
                                       estimatedTombstoneDropTime,
                                       sstableLevel,
                                       minColumnNames,
                                       maxColumnNames);
        }

        public Collector estimatedRowSize(EstimatedHistogram estimatedRowSize)
        {
            this.estimatedRowSize = estimatedRowSize;
            return this;
        }

        public Collector estimatedColumnCount(EstimatedHistogram estimatedColumnCount)
        {
            this.estimatedColumnCount = estimatedColumnCount;
            return this;
        }

        public Collector replayPosition(ReplayPosition replayPosition)
        {
            this.replayPosition = replayPosition;
            return this;
        }

        public Collector addAncestor(int generation)
        {
            this.ancestors.add(generation);
            return this;
        }

        void update(long size, ColumnStats stats)
        {
            updateMinTimestamp(stats.minTimestamp);
            /*
             * The max timestamp is not always collected here (more precisely, row.maxTimestamp() may return Long.MIN_VALUE),
             * to avoid deserializing an EchoedRow.
             * This is the reason why it is collected first when calling ColumnFamilyStore.createCompactionWriter
             * However, for old sstables without timestamp, we still want to update the timestamp (and we know
             * that in this case we will not use EchoedRow, since CompactionControler.needsDeserialize() will be true).
            */
            updateMaxTimestamp(stats.maxTimestamp);
            updateMaxLocalDeletionTime(stats.maxLocalDeletionTime);
            addRowSize(size);
            addColumnCount(stats.columnCount);
            mergeTombstoneHistogram(stats.tombstoneHistogram);
            updateMinColumnNames(stats.minColumnNames);
            updateMaxColumnNames(stats.maxColumnNames);
        }

        public Collector sstableLevel(int sstableLevel)
        {
            this.sstableLevel = sstableLevel;
            return this;
        }

        public Collector updateMinColumnNames(List minColumnNames)
        {
            if (minColumnNames.size() > 0)
                this.minColumnNames = ColumnNameHelper.mergeMin(this.minColumnNames, minColumnNames, columnNameComparator);
            return this;
        }

        public Collector updateMaxColumnNames(List maxColumnNames)
        {
            if (maxColumnNames.size() > 0)
                this.maxColumnNames = ColumnNameHelper.mergeMax(this.maxColumnNames, maxColumnNames, columnNameComparator);
            return this;
        }
    }

    public static class SSTableMetadataSerializer
    {
        private static final Logger logger = LoggerFactory.getLogger(SSTableMetadataSerializer.class);

        public void serialize(SSTableMetadata sstableStats, Set ancestors, DataOutput out) throws IOException
        {
            assert sstableStats.partitioner != null;

            EstimatedHistogram.serializer.serialize(sstableStats.estimatedRowSize, out);
            EstimatedHistogram.serializer.serialize(sstableStats.estimatedColumnCount, out);
            ReplayPosition.serializer.serialize(sstableStats.replayPosition, out);
            out.writeLong(sstableStats.minTimestamp);
            out.writeLong(sstableStats.maxTimestamp);
            out.writeInt(sstableStats.maxLocalDeletionTime);
            out.writeDouble(sstableStats.bloomFilterFPChance);
            out.writeDouble(sstableStats.compressionRatio);
            out.writeUTF(sstableStats.partitioner);
            out.writeInt(ancestors.size());
            for (Integer g : ancestors)
                out.writeInt(g);
            StreamingHistogram.serializer.serialize(sstableStats.estimatedTombstoneDropTime, out);
            out.writeInt(sstableStats.sstableLevel);
            serializeMinMaxColumnNames(sstableStats.minColumnNames, sstableStats.maxColumnNames, out);
        }

        private void serializeMinMaxColumnNames(List minColNames, List maxColNames, DataOutput out) throws IOException
        {
            out.writeInt(minColNames.size());
            for (ByteBuffer columnName : minColNames)
                ByteBufferUtil.writeWithShortLength(columnName, out);
            out.writeInt(maxColNames.size());
            for (ByteBuffer columnName : maxColNames)
                ByteBufferUtil.writeWithShortLength(columnName, out);
        }
        /**
         * Used to serialize to an old version - needed to be able to update sstable level without a full compaction.
         *
         * @deprecated will be removed when it is assumed that the minimum upgrade-from-version is the version that this
         * patch made it into
         *
         * @param sstableStats
         * @param legacyDesc
         * @param out
         * @throws IOException
         */
        @Deprecated
        public void legacySerialize(SSTableMetadata sstableStats, Set ancestors, Descriptor legacyDesc, DataOutput out) throws IOException
        {
            EstimatedHistogram.serializer.serialize(sstableStats.estimatedRowSize, out);
            EstimatedHistogram.serializer.serialize(sstableStats.estimatedColumnCount, out);
            ReplayPosition.serializer.serialize(sstableStats.replayPosition, out);
            out.writeLong(sstableStats.minTimestamp);
            out.writeLong(sstableStats.maxTimestamp);
            if (legacyDesc.version.tracksMaxLocalDeletionTime)
                out.writeInt(sstableStats.maxLocalDeletionTime);
            if (legacyDesc.version.hasBloomFilterFPChance)
                out.writeDouble(sstableStats.bloomFilterFPChance);
            out.writeDouble(sstableStats.compressionRatio);
            out.writeUTF(sstableStats.partitioner);
            out.writeInt(ancestors.size());
            for (Integer g : ancestors)
                out.writeInt(g);
            StreamingHistogram.serializer.serialize(sstableStats.estimatedTombstoneDropTime, out);
            out.writeInt(sstableStats.sstableLevel);
            if (legacyDesc.version.tracksMaxMinColumnNames)
                serializeMinMaxColumnNames(sstableStats.minColumnNames, sstableStats.maxColumnNames, out);
        }

        /**
         * deserializes the metadata
         *
         * returns a pair containing the part of the metadata meant to be kept-in memory and the part
         * that should not.
         *
         * @param descriptor the descriptor
         * @return a pair containing data that needs to be in memory and data that is potentially big and is not needed
         *         in memory
         * @throws IOException
         */
        public Pair> deserialize(Descriptor descriptor) throws IOException
        {
            return deserialize(descriptor, true);
        }

        public Pair> deserialize(Descriptor descriptor, boolean loadSSTableLevel) throws IOException
        {
            logger.debug("Load metadata for {}", descriptor);
            File statsFile = new File(descriptor.filenameFor(SSTable.COMPONENT_STATS));
            if (!statsFile.exists())
            {
                logger.debug("No sstable stats for {}", descriptor);
                return Pair.create(new SSTableMetadata(), Collections.emptySet());
            }

            DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(statsFile)));
            try
            {
                return deserialize(in, descriptor, loadSSTableLevel);
            }
            finally
            {
                FileUtils.closeQuietly(in);
            }
        }
        public Pair> deserialize(DataInputStream in, Descriptor desc) throws IOException
        {
            return deserialize(in, desc, true);
        }

        public Pair> deserialize(DataInputStream in, Descriptor desc, boolean loadSSTableLevel) throws IOException
        {
            EstimatedHistogram rowSizes = EstimatedHistogram.serializer.deserialize(in);
            EstimatedHistogram columnCounts = EstimatedHistogram.serializer.deserialize(in);
            ReplayPosition replayPosition = ReplayPosition.serializer.deserialize(in);
            long minTimestamp = in.readLong();
            long maxTimestamp = in.readLong();
            int maxLocalDeletionTime = desc.version.tracksMaxLocalDeletionTime ? in.readInt() : Integer.MAX_VALUE;
            double bloomFilterFPChance = desc.version.hasBloomFilterFPChance ? in.readDouble() : NO_BLOOM_FLITER_FP_CHANCE;
            double compressionRatio = in.readDouble();
            String partitioner = in.readUTF();
            int nbAncestors = in.readInt();
            Set ancestors = new HashSet(nbAncestors);
            for (int i = 0; i < nbAncestors; i++)
                ancestors.add(in.readInt());
            StreamingHistogram tombstoneHistogram = StreamingHistogram.serializer.deserialize(in);
            int sstableLevel = 0;

            if (loadSSTableLevel && in.available() > 0)
                sstableLevel = in.readInt();

            List minColumnNames;
            List maxColumnNames;
            if (desc.version.tracksMaxMinColumnNames)
            {
                int colCount = in.readInt();
                minColumnNames = new ArrayList(colCount);
                for (int i = 0; i < colCount; i++)
                {
                    minColumnNames.add(ByteBufferUtil.readWithShortLength(in));
                }
                colCount = in.readInt();
                maxColumnNames = new ArrayList(colCount);
                for (int i = 0; i < colCount; i++)
                {
                    maxColumnNames.add(ByteBufferUtil.readWithShortLength(in));
                }
            }
            else
            {
                minColumnNames = Collections.emptyList();
                maxColumnNames = Collections.emptyList();
            }
            return Pair.create(new SSTableMetadata(rowSizes,
                                       columnCounts,
                                       replayPosition,
                                       minTimestamp,
                                       maxTimestamp,
                                       maxLocalDeletionTime,
                                       bloomFilterFPChance,
                                       compressionRatio,
                                       partitioner,
                                       tombstoneHistogram,
                                       sstableLevel,
                                       minColumnNames,
                                       maxColumnNames), ancestors);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy