All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.io.sstable.metadata.MetadataCollector Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.io.sstable.metadata;

import java.io.File;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;

import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
import com.clearspring.analytics.stream.cardinality.ICardinality;
import org.apache.cassandra.db.commitlog.ReplayPosition;
import org.apache.cassandra.db.composites.CellNameType;
import org.apache.cassandra.io.sstable.ColumnNameHelper;
import org.apache.cassandra.io.sstable.ColumnStats;
import org.apache.cassandra.io.sstable.Component;
import org.apache.cassandra.io.sstable.SSTable;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.service.ActiveRepairService;
import org.apache.cassandra.utils.EstimatedHistogram;
import org.apache.cassandra.utils.MurmurHash;
import org.apache.cassandra.utils.StreamingHistogram;

public class MetadataCollector
{
    public static final double NO_COMPRESSION_RATIO = -1.0;

    static EstimatedHistogram defaultColumnCountHistogram()
    {
        // EH of 114 can track a max value of 2395318855, i.e., > 2B columns
        return new EstimatedHistogram(114);
    }

    static EstimatedHistogram defaultRowSizeHistogram()
    {
        // EH of 150 can track a max value of 1697806495183, i.e., > 1.5PB
        return new EstimatedHistogram(150);
    }

    static StreamingHistogram defaultTombstoneDropTimeHistogram()
    {
        return new StreamingHistogram(SSTable.TOMBSTONE_HISTOGRAM_BIN_SIZE);
    }

    public static StatsMetadata defaultStatsMetadata()
    {
        return new StatsMetadata(defaultRowSizeHistogram(),
                                 defaultColumnCountHistogram(),
                                 ReplayPosition.NONE,
                                 ReplayPosition.NONE,
                                 Long.MIN_VALUE,
                                 Long.MAX_VALUE,
                                 Integer.MAX_VALUE,
                                 NO_COMPRESSION_RATIO,
                                 defaultTombstoneDropTimeHistogram(),
                                 0,
                                 Collections.emptyList(),
                                 Collections.emptyList(),
                                 true,
                                 ActiveRepairService.UNREPAIRED_SSTABLE);
    }

    protected EstimatedHistogram estimatedRowSize = defaultRowSizeHistogram();
    protected EstimatedHistogram estimatedColumnCount = defaultColumnCountHistogram();
    protected ReplayPosition commitLogLowerBound = ReplayPosition.NONE;
    protected ReplayPosition commitLogUpperBound = ReplayPosition.NONE;
    protected long minTimestamp = Long.MAX_VALUE;
    protected long maxTimestamp = Long.MIN_VALUE;
    protected int maxLocalDeletionTime = Integer.MIN_VALUE;
    protected double compressionRatio = NO_COMPRESSION_RATIO;
    protected Set ancestors = new HashSet<>();
    protected StreamingHistogram estimatedTombstoneDropTime = defaultTombstoneDropTimeHistogram();
    protected int sstableLevel;
    protected List minColumnNames = Collections.emptyList();
    protected List maxColumnNames = Collections.emptyList();
    protected boolean hasLegacyCounterShards = false;

    /**
     * Default cardinality estimation method is to use HyperLogLog++.
     * Parameter here(p=13, sp=25) should give reasonable estimation
     * while lowering bytes required to hold information.
     * See CASSANDRA-5906 for detail.
     */
    protected ICardinality cardinality = new HyperLogLogPlus(13, 25);
    private final CellNameType columnNameComparator;

    public MetadataCollector(CellNameType columnNameComparator)
    {
        this.columnNameComparator = columnNameComparator;
    }

    public MetadataCollector(Iterable sstables, CellNameType columnNameComparator, int level, boolean skipAncestors)
    {
        this(columnNameComparator);

        ReplayPosition min = null, max = null;
        for (SSTableReader sstable : sstables)
        {
            if (min == null)
            {
                min = sstable.getSSTableMetadata().commitLogLowerBound;
                max = sstable.getSSTableMetadata().commitLogUpperBound;
            }
            else
            {
                min = Ordering.natural().min(min, sstable.getSSTableMetadata().commitLogLowerBound);
                max = Ordering.natural().max(max, sstable.getSSTableMetadata().commitLogUpperBound);
            }
        }

        commitLogLowerBound(min);
        commitLogUpperBound(max);
        sstableLevel(level);
        // Get the max timestamp of the precompacted sstables
        // and adds generation of live ancestors
        if (!skipAncestors)
        {
            for (SSTableReader sstable : sstables)
            {
                addAncestor(sstable.descriptor.generation);
                for (Integer i : sstable.getAncestors())
                    if (new File(sstable.descriptor.withGeneration(i).filenameFor(Component.DATA)).exists())
                        addAncestor(i);
            }
        }
    }

    public MetadataCollector(Iterable sstables, CellNameType columnNameComparator, int level)
    {
        this(sstables, columnNameComparator, level, false);
    }

    public MetadataCollector addKey(ByteBuffer key)
    {
        long hashed = MurmurHash.hash2_64(key, key.position(), key.remaining(), 0);
        cardinality.offerHashed(hashed);
        return this;
    }

    public MetadataCollector addRowSize(long rowSize)
    {
        estimatedRowSize.add(rowSize);
        return this;
    }

    public MetadataCollector addColumnCount(long columnCount)
    {
        estimatedColumnCount.add(columnCount);
        return this;
    }

    public MetadataCollector mergeTombstoneHistogram(StreamingHistogram histogram)
    {
        estimatedTombstoneDropTime.merge(histogram);
        return this;
    }

    /**
     * Ratio is compressed/uncompressed and it is
     * if you have 1.x then compression isn't helping
     */
    public MetadataCollector addCompressionRatio(long compressed, long uncompressed)
    {
        compressionRatio = (double) compressed/uncompressed;
        return this;
    }

    public MetadataCollector updateMinTimestamp(long potentialMin)
    {
        minTimestamp = Math.min(minTimestamp, potentialMin);
        return this;
    }

    public MetadataCollector updateMaxTimestamp(long potentialMax)
    {
        maxTimestamp = Math.max(maxTimestamp, potentialMax);
        return this;
    }

    public MetadataCollector updateMaxLocalDeletionTime(int maxLocalDeletionTime)
    {
        this.maxLocalDeletionTime = Math.max(this.maxLocalDeletionTime, maxLocalDeletionTime);
        return this;
    }

    public MetadataCollector estimatedRowSize(EstimatedHistogram estimatedRowSize)
    {
        this.estimatedRowSize = estimatedRowSize;
        return this;
    }

    public MetadataCollector estimatedColumnCount(EstimatedHistogram estimatedColumnCount)
    {
        this.estimatedColumnCount = estimatedColumnCount;
        return this;
    }

    public MetadataCollector commitLogLowerBound(ReplayPosition commitLogLowerBound)
    {
        this.commitLogLowerBound = commitLogLowerBound;
        return this;
    }

    public MetadataCollector commitLogUpperBound(ReplayPosition commitLogUpperBound)
    {
        this.commitLogUpperBound = commitLogUpperBound;
        return this;
    }

    public MetadataCollector addAncestor(int generation)
    {
        this.ancestors.add(generation);
        return this;
    }

    public MetadataCollector sstableLevel(int sstableLevel)
    {
        this.sstableLevel = sstableLevel;
        return this;
    }

    public MetadataCollector updateMinColumnNames(List minColumnNames)
    {
        if (minColumnNames.size() > 0)
            this.minColumnNames = ColumnNameHelper.mergeMin(this.minColumnNames, minColumnNames, columnNameComparator);
        return this;
    }

    public MetadataCollector updateMaxColumnNames(List maxColumnNames)
    {
        if (maxColumnNames.size() > 0)
            this.maxColumnNames = ColumnNameHelper.mergeMax(this.maxColumnNames, maxColumnNames, columnNameComparator);
        return this;
    }

    public MetadataCollector updateHasLegacyCounterShards(boolean hasLegacyCounterShards)
    {
        this.hasLegacyCounterShards = this.hasLegacyCounterShards || hasLegacyCounterShards;
        return this;
    }

    public MetadataCollector update(long rowSize, ColumnStats stats)
    {
        updateMinTimestamp(stats.minTimestamp);
        updateMaxTimestamp(stats.maxTimestamp);
        updateMaxLocalDeletionTime(stats.maxLocalDeletionTime);
        addRowSize(rowSize);
        addColumnCount(stats.columnCount);
        mergeTombstoneHistogram(stats.tombstoneHistogram);
        updateMinColumnNames(stats.minColumnNames);
        updateMaxColumnNames(stats.maxColumnNames);
        updateHasLegacyCounterShards(stats.hasLegacyCounterShards);
        return this;
    }

    public Map finalizeMetadata(String partitioner, double bloomFilterFPChance, long repairedAt)
    {
        Map components = Maps.newHashMap();
        components.put(MetadataType.VALIDATION, new ValidationMetadata(partitioner, bloomFilterFPChance));
        components.put(MetadataType.STATS, new StatsMetadata(estimatedRowSize,
                                                             estimatedColumnCount,
                                                             commitLogLowerBound,
                                                             commitLogUpperBound,
                                                             minTimestamp,
                                                             maxTimestamp,
                                                             maxLocalDeletionTime,
                                                             compressionRatio,
                                                             estimatedTombstoneDropTime,
                                                             sstableLevel,
                                                             ImmutableList.copyOf(minColumnNames),
                                                             ImmutableList.copyOf(maxColumnNames),
                                                             hasLegacyCounterShards,
                                                             repairedAt));
        components.put(MetadataType.COMPACTION, new CompactionMetadata(ancestors, cardinality));
        return components;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy