org.apache.cassandra.db.ColumnIndex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;

import com.google.common.annotations.VisibleForTesting;

import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.composites.Composite;
import org.apache.cassandra.io.sstable.IndexHelper;
import org.apache.cassandra.io.util.DataOutputPlus;
import org.apache.cassandra.utils.ByteBufferUtil;

public class ColumnIndex
{
    public final List columnsIndex;

    private static final ColumnIndex EMPTY = new ColumnIndex(Collections.emptyList());

    private ColumnIndex(List columnsIndex)
    {
        assert columnsIndex != null;

        this.columnsIndex = columnsIndex;
    }

    @VisibleForTesting
    public static ColumnIndex nothing()
    {
        return EMPTY;
    }

    /**
     * Help to create an index for a column family based on size of columns,
     * and write said columns to disk.
     */
    public static class Builder
    {
        private final ColumnIndex result;
        private final long indexOffset;
        private long startPosition = -1;
        private long endPosition = 0;
        private long blockSize;
        private OnDiskAtom firstColumn;
        private OnDiskAtom lastColumn;
        private OnDiskAtom lastBlockClosing;
        private final DataOutputPlus output;
        private final RangeTombstone.Tracker tombstoneTracker;
        private int atomCount;
        private final ByteBuffer key;
        private final DeletionInfo deletionInfo; // only used for serializing and calculating row header size

        private final OnDiskAtom.SerializerForWriting atomSerializer;

        public Builder(ColumnFamily cf,
                       ByteBuffer key,
                       DataOutputPlus output)
        {
            this(cf, key, output, cf.getComparator().onDiskAtomSerializer());
        }

        public Builder(ColumnFamily cf,
                ByteBuffer key,
                DataOutputPlus output,
                OnDiskAtom.SerializerForWriting serializer)
        {
            assert cf != null;
            assert key != null;
            assert output != null;

            this.key = key;
            deletionInfo = cf.deletionInfo();
            this.indexOffset = rowHeaderSize(key, deletionInfo);
            this.result = new ColumnIndex(new ArrayList());
            this.output = output;
            this.tombstoneTracker = new RangeTombstone.Tracker(cf.getComparator());
            this.atomSerializer = serializer;
        }

        /**
         * Returns the number of bytes between the beginning of the row and the
         * first serialized column.
         */
        private static long rowHeaderSize(ByteBuffer key, DeletionInfo delInfo)
        {
            TypeSizes typeSizes = TypeSizes.NATIVE;
            // TODO fix constantSize when changing the nativeconststs.
            int keysize = key.remaining();
            return typeSizes.sizeof((short) keysize) + keysize          // Row key
                 + DeletionTime.serializer.serializedSize(delInfo.getTopLevelDeletion(), typeSizes);
        }

        public RangeTombstone.Tracker tombstoneTracker()
        {
            return tombstoneTracker;
        }

        public int writtenAtomCount()
        {
            return atomCount + tombstoneTracker.writtenAtom();
        }

        /**
         * Serializes the index into in-memory structure with all required components
         * such as Bloom Filter, index block size, IndexInfo list
         *
         * @param cf Column family to create index for
         *
         * @return information about index - it's Bloom Filter, block size and IndexInfo list
         */
        public ColumnIndex build(ColumnFamily cf) throws IOException
        {
            // cf has disentangled the columns and range tombstones, we need to re-interleave them in comparator order
            Comparator comparator = cf.getComparator();
            DeletionInfo.InOrderTester tester = cf.deletionInfo().inOrderTester();
            Iterator rangeIter = cf.deletionInfo().rangeIterator();
            RangeTombstone tombstone = rangeIter.hasNext() ? rangeIter.next() : null;

            for (Cell c : cf)
            {
                while (tombstone != null && comparator.compare(c.name(), tombstone.min) >= 0)
                {
                    // skip range tombstones that are shadowed by partition tombstones
                    if (!cf.deletionInfo().getTopLevelDeletion().isDeleted(tombstone))
                        add(tombstone);
                    tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
                }

                // We can skip any cell if it's shadowed by a tombstone already. This is a more
                // general case than was handled by CASSANDRA-2589.
                if (!tester.isDeleted(c))
                    add(c);
            }

            while (tombstone != null)
            {
                add(tombstone);
                tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
            }
            finishAddingAtoms();
            ColumnIndex index = build();

            maybeWriteEmptyRowHeader();

            return index;
        }

        /**
         * The important distinction wrt build() is that we may be building for a row that ends up
         * being compacted away entirely, i.e., the input consists only of expired tombstones (or
         * columns shadowed by expired tombstone).  Thus, it is the caller's responsibility
         * to decide whether to write the header for an empty row.
         */
        public ColumnIndex buildForCompaction(Iterator columns) throws IOException
        {
            while (columns.hasNext())
            {
                OnDiskAtom c =  columns.next();
                add(c);
            }
            finishAddingAtoms();

            return build();
        }

        public void add(OnDiskAtom column) throws IOException
        {
            atomCount++;

            if (firstColumn == null)
            {
                firstColumn = column;
                startPosition = endPosition;
                // TODO: have that use the firstColumn as min + make sure we optimize that on read
                endPosition += tombstoneTracker.writeOpenedMarkers(firstColumn.name(), output, atomSerializer);
                blockSize = 0; // We don't count repeated tombstone marker in the block size, to avoid a situation
                               // where we wouldn't make any progress because a block is filled by said marker

                maybeWriteRowHeader();
            }

            if (tombstoneTracker.update(column, false))
            {
                long size = tombstoneTracker.writeUnwrittenTombstones(output, atomSerializer);
                size += atomSerializer.serializedSizeForSSTable(column);
                endPosition += size;
                blockSize += size;

                atomSerializer.serializeForSSTable(column, output);
            }

            lastColumn = column;

            // if we hit the column index size that we have to index after, go ahead and index it.
            if (blockSize >= DatabaseDescriptor.getColumnIndexSize())
            {
                IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), column.name(), indexOffset + startPosition, endPosition - startPosition);
                result.columnsIndex.add(cIndexInfo);
                firstColumn = null;
                lastBlockClosing = column;
            }
        }

        private void maybeWriteRowHeader() throws IOException
        {
            if (lastColumn == null)
            {
                ByteBufferUtil.writeWithShortLength(key, output);
                DeletionTime.serializer.serialize(deletionInfo.getTopLevelDeletion(), output);
            }
        }

        public void finishAddingAtoms() throws IOException
        {
            long size = tombstoneTracker.writeUnwrittenTombstones(output, atomSerializer);
            endPosition += size;
            blockSize += size;
        }

        public ColumnIndex build()
        {
            assert !tombstoneTracker.hasUnwrittenTombstones();  // finishAddingAtoms must be called before building.
            // all columns were GC'd after all
            if (lastColumn == null)
                return ColumnIndex.EMPTY;

            // the last column may have fallen on an index boundary already.  if not, index it explicitly.
            if (result.columnsIndex.isEmpty() || lastBlockClosing != lastColumn)
            {
                IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), lastColumn.name(), indexOffset + startPosition, endPosition - startPosition);
                result.columnsIndex.add(cIndexInfo);
            }

            // we should always have at least one computed index block, but we only write it out if there is more than that.
            assert result.columnsIndex.size() > 0;
            return result;
        }

        public void maybeWriteEmptyRowHeader() throws IOException
        {
            if (!deletionInfo.isLive())
                maybeWriteRowHeader();
        }
    }
}