All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.io.sstable.metadata.MetadataCollector Maven / Gradle / Ivy

Go to download

The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.

There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.io.sstable.metadata;

import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Map;
import java.util.UUID;

import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
import com.clearspring.analytics.stream.cardinality.ICardinality;
import org.apache.cassandra.db.Clustering;
import org.apache.cassandra.db.ClusteringBound;
import org.apache.cassandra.db.ClusteringBoundOrBoundary;
import org.apache.cassandra.db.ClusteringComparator;
import org.apache.cassandra.db.ClusteringPrefix;
import org.apache.cassandra.db.DeletionTime;
import org.apache.cassandra.db.LivenessInfo;
import org.apache.cassandra.db.SerializationHeader;
import org.apache.cassandra.db.Slice;
import org.apache.cassandra.db.commitlog.CommitLogPosition;
import org.apache.cassandra.db.commitlog.IntervalSet;
import org.apache.cassandra.db.partitions.PartitionStatisticsCollector;
import org.apache.cassandra.db.rows.Cell;
import org.apache.cassandra.db.rows.Unfiltered;
import org.apache.cassandra.io.sstable.SSTable;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.service.ActiveRepairService;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.EstimatedHistogram;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.MurmurHash;
import org.apache.cassandra.utils.TimeUUID;
import org.apache.cassandra.utils.streamhist.StreamingTombstoneHistogramBuilder;
import org.apache.cassandra.utils.streamhist.TombstoneHistogram;

public class MetadataCollector implements PartitionStatisticsCollector
{
    public static final double NO_COMPRESSION_RATIO = -1.0;

    private long currentPartitionCells = 0;

    static EstimatedHistogram defaultCellPerPartitionCountHistogram()
    {
        // EH of 118 can track a max value of 4139110981, i.e., > 4B cells
        return new EstimatedHistogram(118);
    }

    static EstimatedHistogram defaultPartitionSizeHistogram()
    {
        // EH of 155 can track a max value of 3520571548412 i.e. 3.5TB
        return new EstimatedHistogram(155);

    }

    static TombstoneHistogram defaultTombstoneDropTimeHistogram()
    {
        return TombstoneHistogram.createDefault();
    }

    public static StatsMetadata defaultStatsMetadata()
    {
        return new StatsMetadata(defaultPartitionSizeHistogram(),
                                 defaultCellPerPartitionCountHistogram(),
                                 IntervalSet.empty(),
                                 Long.MIN_VALUE,
                                 Long.MAX_VALUE,
                                 Integer.MAX_VALUE,
                                 Integer.MAX_VALUE,
                                 0,
                                 Integer.MAX_VALUE,
                                 NO_COMPRESSION_RATIO,
                                 defaultTombstoneDropTimeHistogram(),
                                 0,
                                 Collections.emptyList(),
                                 Slice.ALL,
                                 true,
                                 ActiveRepairService.UNREPAIRED_SSTABLE,
                                 -1,
                                 -1,
                                 Double.NaN,
                                 null,
                                 null,
                                 false,
                                 true,
                                 ByteBufferUtil.EMPTY_BYTE_BUFFER,
                                 ByteBufferUtil.EMPTY_BYTE_BUFFER);
    }

    protected EstimatedHistogram estimatedPartitionSize = defaultPartitionSizeHistogram();
    // TODO: cound the number of row per partition (either with the number of cells, or instead)
    protected EstimatedHistogram estimatedCellPerPartitionCount = defaultCellPerPartitionCountHistogram();
    protected IntervalSet commitLogIntervals = IntervalSet.empty();
    protected final MinMaxLongTracker timestampTracker = new MinMaxLongTracker();
    protected final MinMaxLongTracker localDeletionTimeTracker = new MinMaxLongTracker(Cell.NO_DELETION_TIME, Cell.NO_DELETION_TIME);
    protected final MinMaxIntTracker ttlTracker = new MinMaxIntTracker(Cell.NO_TTL, Cell.NO_TTL);
    protected double compressionRatio = NO_COMPRESSION_RATIO;
    protected StreamingTombstoneHistogramBuilder estimatedTombstoneDropTime = new StreamingTombstoneHistogramBuilder(SSTable.TOMBSTONE_HISTOGRAM_BIN_SIZE, SSTable.TOMBSTONE_HISTOGRAM_SPOOL_SIZE, SSTable.TOMBSTONE_HISTOGRAM_TTL_ROUND_SECONDS);
    protected int sstableLevel;

    /**
     * The smallest clustering prefix for any {@link Unfiltered} in the sstable.
     *
     * 

This is always either a Clustering, or a start bound (since for any end range tombstone bound, there should * be a corresponding start bound that is smaller). */ private ClusteringPrefix minClustering = ClusteringBound.MAX_START; /** * The largest clustering prefix for any {@link Unfiltered} in the sstable. * *

This is always either a Clustering, or an end bound (since for any start range tombstone bound, there should * be a corresponding end bound that is bigger). */ private ClusteringPrefix maxClustering = ClusteringBound.MIN_END; protected boolean hasLegacyCounterShards = false; private boolean hasPartitionLevelDeletions = false; protected long totalColumnsSet; protected long totalRows; public int totalTombstones; protected double tokenSpaceCoverage = Double.NaN; /** * Default cardinality estimation method is to use HyperLogLog++. * Parameter here(p=13, sp=25) should give reasonable estimation * while lowering bytes required to hold information. * See CASSANDRA-5906 for detail. */ protected ICardinality cardinality = new HyperLogLogPlus(13, 25); private final ClusteringComparator comparator; private final long nowInSec = FBUtilities.nowInSeconds(); private final UUID originatingHostId; public MetadataCollector(ClusteringComparator comparator) { this(comparator, StorageService.instance.getLocalHostUUID()); } public MetadataCollector(ClusteringComparator comparator, UUID originatingHostId) { this.comparator = comparator; this.originatingHostId = originatingHostId; } public MetadataCollector(Iterable sstables, ClusteringComparator comparator) { this(comparator); IntervalSet.Builder intervals = new IntervalSet.Builder<>(); if (originatingHostId != null) { for (SSTableReader sstable : sstables) { if (originatingHostId.equals(sstable.getSSTableMetadata().originatingHostId)) intervals.addAll(sstable.getSSTableMetadata().commitLogIntervals); } } commitLogIntervals(intervals.build()); } public MetadataCollector addKey(ByteBuffer key) { long hashed = MurmurHash.hash2_64(key, key.position(), key.remaining(), 0); cardinality.offerHashed(hashed); totalTombstones = 0; return this; } public MetadataCollector addPartitionSizeInBytes(long partitionSize) { estimatedPartitionSize.add(partitionSize); return this; } public MetadataCollector addCellPerPartitionCount(long cellCount) { estimatedCellPerPartitionCount.add(cellCount); return this; } public MetadataCollector addCellPerPartitionCount() { estimatedCellPerPartitionCount.add(currentPartitionCells); currentPartitionCells = 0; return this; } /** * Ratio is compressed/uncompressed and it is * if you have 1.x then compression isn't helping */ public MetadataCollector addCompressionRatio(long compressed, long uncompressed) { compressionRatio = (double) compressed/uncompressed; return this; } public void update(LivenessInfo newInfo) { if (newInfo.isEmpty()) return; updateTimestamp(newInfo.timestamp()); updateTTL(newInfo.ttl()); updateLocalDeletionTime(newInfo.localExpirationTime()); if (!newInfo.isLive(nowInSec)) updateTombstoneCount(); } public void update(Cell cell) { ++currentPartitionCells; updateTimestamp(cell.timestamp()); updateTTL(cell.ttl()); updateLocalDeletionTime(cell.localDeletionTime()); if (!cell.isLive(nowInSec)) updateTombstoneCount(); } public void updatePartitionDeletion(DeletionTime dt) { if (!dt.isLive()) hasPartitionLevelDeletions = true; update(dt); } public void update(DeletionTime dt) { if (!dt.isLive()) { updateTimestamp(dt.markedForDeleteAt()); updateLocalDeletionTime(dt.localDeletionTime()); updateTombstoneCount(); } } public void updateColumnSetPerRow(long columnSetInRow) { totalColumnsSet += columnSetInRow; ++totalRows; } private void updateTimestamp(long newTimestamp) { timestampTracker.update(newTimestamp); } private void updateLocalDeletionTime(long newLocalDeletionTime) { localDeletionTimeTracker.update(newLocalDeletionTime); if (newLocalDeletionTime != Cell.NO_DELETION_TIME) estimatedTombstoneDropTime.update(newLocalDeletionTime); } private void updateTombstoneCount() { ++totalTombstones; } private void updateTTL(int newTTL) { ttlTracker.update(newTTL); } public MetadataCollector commitLogIntervals(IntervalSet commitLogIntervals) { this.commitLogIntervals = commitLogIntervals; return this; } public MetadataCollector sstableLevel(int sstableLevel) { this.sstableLevel = sstableLevel; return this; } public MetadataCollector tokenSpaceCoverage(double coverage) { tokenSpaceCoverage = coverage; return this; } public void updateClusteringValues(Clustering clustering) { if (clustering == Clustering.STATIC_CLUSTERING) return; // In case of monotonically growing stream of clusterings, we will usually require only one comparison // because if we detected X is greater than the current MAX, then it cannot be lower than the current MIN // at the same time. The only case when we need to update MIN when the current MAX was detected to be updated // is the case when MIN was not yet initialized and still point the ClusteringBound.MAX_START if (comparator.compare(clustering, maxClustering) > 0) { maxClustering = clustering; if (minClustering == ClusteringBound.MAX_START) minClustering = clustering; } else if (comparator.compare(clustering, minClustering) < 0) { minClustering = clustering; } } public void updateClusteringValuesByBoundOrBoundary(ClusteringBoundOrBoundary clusteringBoundOrBoundary) { // In a SSTable, every opening marker will be closed, so the start of a range tombstone marker will never be // the maxClustering (the corresponding close might though) and there is no point in doing the comparison // (and vice-versa for the close). By the same reasoning, a boundary will never be either the min or max // clustering, and we can save on comparisons. if (clusteringBoundOrBoundary.isBoundary()) return; // see the comment in updateClusteringValues(Clustering) if (comparator.compare(clusteringBoundOrBoundary, maxClustering) > 0) { if (clusteringBoundOrBoundary.kind().isEnd()) maxClustering = clusteringBoundOrBoundary; // note that since we excluded boundaries above, there is no way that the provided clustering prefix is // a start and en end at the same time else if (minClustering == ClusteringBound.MAX_START) minClustering = clusteringBoundOrBoundary; } else if (comparator.compare(clusteringBoundOrBoundary, minClustering) < 0) { if (clusteringBoundOrBoundary.kind().isStart()) minClustering = clusteringBoundOrBoundary; else if (maxClustering == ClusteringBound.MIN_END) maxClustering = clusteringBoundOrBoundary; } } public void updateHasLegacyCounterShards(boolean hasLegacyCounterShards) { this.hasLegacyCounterShards = this.hasLegacyCounterShards || hasLegacyCounterShards; } public Map finalizeMetadata(String partitioner, double bloomFilterFPChance, long repairedAt, TimeUUID pendingRepair, boolean isTransient, SerializationHeader header, ByteBuffer firstKey, ByteBuffer lastKey) { assert minClustering.kind() == ClusteringPrefix.Kind.CLUSTERING || minClustering.kind().isStart(); assert maxClustering.kind() == ClusteringPrefix.Kind.CLUSTERING || maxClustering.kind().isEnd(); Map components = new EnumMap<>(MetadataType.class); components.put(MetadataType.VALIDATION, new ValidationMetadata(partitioner, bloomFilterFPChance)); components.put(MetadataType.STATS, new StatsMetadata(estimatedPartitionSize, estimatedCellPerPartitionCount, commitLogIntervals, timestampTracker.min(), timestampTracker.max(), localDeletionTimeTracker.min(), localDeletionTimeTracker.max(), ttlTracker.min(), ttlTracker.max(), compressionRatio, estimatedTombstoneDropTime.build(), sstableLevel, comparator.subtypes(), Slice.make(minClustering.retainable().asStartBound(), maxClustering.retainable().asEndBound()), hasLegacyCounterShards, repairedAt, totalColumnsSet, totalRows, tokenSpaceCoverage, originatingHostId, pendingRepair, isTransient, hasPartitionLevelDeletions, firstKey, lastKey)); components.put(MetadataType.COMPACTION, new CompactionMetadata(cardinality)); components.put(MetadataType.HEADER, header.toComponent()); return components; } /** * Release large memory objects while keeping metrics intact */ public void release() { estimatedTombstoneDropTime.releaseBuffers(); } public static class MinMaxLongTracker { private final long defaultMin; private final long defaultMax; private boolean isSet = false; private long min; private long max; public MinMaxLongTracker() { this(Long.MIN_VALUE, Long.MAX_VALUE); } public MinMaxLongTracker(long defaultMin, long defaultMax) { this.defaultMin = defaultMin; this.defaultMax = defaultMax; } public void update(long value) { if (!isSet) { min = max = value; isSet = true; } else { if (value < min) min = value; if (value > max) max = value; } } public long min() { return isSet ? min : defaultMin; } public long max() { return isSet ? max : defaultMax; } } public static class MinMaxIntTracker { private final int defaultMin; private final int defaultMax; private boolean isSet = false; private int min; private int max; public MinMaxIntTracker() { this(Integer.MIN_VALUE, Integer.MAX_VALUE); } public MinMaxIntTracker(int defaultMin, int defaultMax) { this.defaultMin = defaultMin; this.defaultMax = defaultMax; } public void update(int value) { if (!isSet) { min = max = value; isSet = true; } else { if (value < min) min = value; if (value > max) max = value; } } public int min() { return isSet ? min : defaultMin; } public int max() { return isSet ? max : defaultMax; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy