All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.db.partitions.CachedBTreePartition Maven / Gradle / Ivy

Go to download

The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.

There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.partitions;

import java.io.IOException;

import org.apache.cassandra.db.*;
import org.apache.cassandra.db.filter.DataLimits;
import org.apache.cassandra.db.rows.*;
import org.apache.cassandra.io.ISerializer;
import org.apache.cassandra.io.util.DataInputPlus;
import org.apache.cassandra.io.util.DataOutputPlus;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.schema.Schema;
import org.apache.cassandra.schema.TableId;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.utils.CassandraUInt;
import org.apache.cassandra.utils.btree.BTree;

public class CachedBTreePartition extends ImmutableBTreePartition implements CachedPartition
{
    private final long createdAtInSec;

    private final int cachedLiveRows;
    private final int rowsWithNonExpiringCells;

    private CachedBTreePartition(TableMetadata metadata,
                                 DecoratedKey partitionKey,
                                 BTreePartitionData holder,
                                 long createdAtInSec,
                                 int cachedLiveRows,
                                 int rowsWithNonExpiringCells)
    {
        super(metadata, partitionKey, holder);
        this.createdAtInSec = createdAtInSec;
        this.cachedLiveRows = cachedLiveRows;
        this.rowsWithNonExpiringCells = rowsWithNonExpiringCells;
    }

    /**
     * Creates an {@code ArrayBackedCachedPartition} holding all the data of the provided iterator.
     *
     * Warning: Note that this method does not close the provided iterator and it is
     * up to the caller to do so.
     *
     * @param iterator the iterator got gather in memory.
     * @param nowInSec the time of the creation in seconds. This is the time at which {@link #cachedLiveRows} applies.
     * @return the created partition.
     */
    public static CachedBTreePartition create(UnfilteredRowIterator iterator, long nowInSec)
    {
        return create(iterator, 16, nowInSec);
    }

    /**
     * Creates an {@code ArrayBackedCachedPartition} holding all the data of the provided iterator.
     *
     * Warning: Note that this method does not close the provided iterator and it is
     * up to the caller to do so.
     *
     * @param iterator the iterator got gather in memory.
     * @param initialRowCapacity sizing hint (in rows) to use for the created partition. It should ideally
     * correspond or be a good estimation of the number or rows in {@code iterator}.
     * @param nowInSec the time of the creation in seconds. This is the time at which {@link #cachedLiveRows} applies.
     * @return the created partition.
     */
    public static CachedBTreePartition create(UnfilteredRowIterator iterator, int initialRowCapacity, long nowInSec)
    {
        BTreePartitionData holder = ImmutableBTreePartition.build(iterator, initialRowCapacity);

        int cachedLiveRows = 0;
        int rowsWithNonExpiringCells = 0;
        boolean enforceStrictLiveness = iterator.metadata().enforceStrictLiveness();

        for (Row row : BTree.iterable(holder.tree))
        {
            if (row.hasLiveData(nowInSec, enforceStrictLiveness))
                ++cachedLiveRows;

            boolean hasNonExpiringLiveCell = false;
            for (Cell cell : row.cells())
            {
                if (!cell.isTombstone() && !cell.isExpiring())
                {
                    hasNonExpiringLiveCell = true;
                    break;
                }
            }

            if (hasNonExpiringLiveCell)
                ++rowsWithNonExpiringCells;
        }

        return new CachedBTreePartition(iterator.metadata(),
                                        iterator.partitionKey(),
                                        holder,
                                        nowInSec,
                                        cachedLiveRows,
                                        rowsWithNonExpiringCells);
    }

    /**
     * The number of rows that were live at the time the partition was cached.
     *
     * See {@link ColumnFamilyStore#isFilterFullyCoveredBy} to see why we need this.
     *
     * @return the number of rows in this partition that were live at the time the
     * partition was cached (this can be different from the number of live rows now
     * due to expiring cells).
     */
    public int cachedLiveRows()
    {
        return cachedLiveRows;
    }

    /**
     * The number of rows in this cached partition that have at least one non-expiring
     * non-deleted cell.
     *
     * Note that this is generally not a very meaningful number, but this is used by
     * {@link DataLimits#hasEnoughLiveData} as an optimization.
     *
     * @return the number of row that have at least one non-expiring non-deleted cell.
     */
    public int rowsWithNonExpiringCells()
    {
        return rowsWithNonExpiringCells;
    }

    static class Serializer implements ISerializer
    {
        public void serialize(CachedPartition partition, DataOutputPlus out) throws IOException
        {
            int version = MessagingService.current_version;

            assert partition instanceof CachedBTreePartition;
            CachedBTreePartition p = (CachedBTreePartition)partition;

            out.writeInt(CassandraUInt.fromLong(p.createdAtInSec));
            out.writeInt(p.cachedLiveRows);
            out.writeInt(p.rowsWithNonExpiringCells);
            partition.metadata().id.serialize(out);
            try (UnfilteredRowIterator iter = p.unfilteredIterator())
            {
                UnfilteredRowIteratorSerializer.serializer.serialize(iter, null, out, version, p.rowCount());
            }
        }

        public CachedPartition deserialize(DataInputPlus in) throws IOException
        {
            int version = MessagingService.current_version;

            // Note that it would be slightly simpler to just do
            //   ArrayBackedCachedPiartition.create(UnfilteredRowIteratorSerializer.serializer.deserialize(...));
            // However deserializing the header separatly is not a lot harder and allows us to:
            //   1) get the capacity of the partition so we can size it properly directly
            //   2) saves the creation of a temporary iterator: rows are directly written to the partition, which
            //      is slightly faster.

            long createdAtInSec = CassandraUInt.toLong(in.readInt());
            int cachedLiveRows = in.readInt();
            int rowsWithNonExpiringCells = in.readInt();


            TableMetadata metadata = Schema.instance.getExistingTableMetadata(TableId.deserialize(in));
            UnfilteredRowIteratorSerializer.Header header = UnfilteredRowIteratorSerializer.serializer.deserializeHeader(metadata, null, in, version, DeserializationHelper.Flag.LOCAL);
            assert !header.isReversed && header.rowEstimate >= 0;

            BTreePartitionData holder;
            try (UnfilteredRowIterator partition = UnfilteredRowIteratorSerializer.serializer.deserialize(in, version, metadata, DeserializationHelper.Flag.LOCAL, header))
            {
                holder = ImmutableBTreePartition.build(partition, header.rowEstimate);
            }

            return new CachedBTreePartition(metadata,
                                            header.key,
                                            holder,
                                            createdAtInSec,
                                            cachedLiveRows,
                                            rowsWithNonExpiringCells);

        }

        public long serializedSize(CachedPartition partition)
        {
            int version = MessagingService.current_version;

            assert partition instanceof CachedBTreePartition;
            CachedBTreePartition p = (CachedBTreePartition)partition;

            try (UnfilteredRowIterator iter = p.unfilteredIterator())
            {
                return TypeSizes.INT_SIZE //createdAtInSec
                     + TypeSizes.sizeof(p.cachedLiveRows)
                     + TypeSizes.sizeof(p.rowsWithNonExpiringCells)
                     + partition.metadata().id.serializedSize()
                     + UnfilteredRowIteratorSerializer.serializer.serializedSize(iter, null, MessagingService.current_version, p.rowCount());
            }
        }
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy