org.apache.cassandra.db.rows.BTreeRow Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.db.rows;
import java.nio.ByteBuffer;
import java.util.AbstractCollection;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import com.google.common.collect.Collections2;
import com.google.common.collect.Iterators;
import com.google.common.primitives.Ints;
import org.apache.cassandra.db.Clustering;
import org.apache.cassandra.db.Columns;
import org.apache.cassandra.db.DeletionPurger;
import org.apache.cassandra.db.DeletionTime;
import org.apache.cassandra.db.LivenessInfo;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.db.filter.ColumnFilter;
import org.apache.cassandra.db.partitions.PartitionUpdate;
import org.apache.cassandra.schema.DroppedColumn;
import org.apache.cassandra.utils.AbstractIterator;
import org.apache.cassandra.utils.BiLongAccumulator;
import org.apache.cassandra.utils.BulkIterator;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.LongAccumulator;
import org.apache.cassandra.utils.ObjectSizes;
import org.apache.cassandra.utils.btree.BTree;
import org.apache.cassandra.utils.btree.BTreeSearchIterator;
import org.apache.cassandra.utils.btree.UpdateFunction;
import org.apache.cassandra.utils.memory.Cloner;
/**
* Immutable implementation of a Row object.
*/
public class BTreeRow extends AbstractRow
{
private static final long EMPTY_SIZE = ObjectSizes.measure(emptyRow(Clustering.EMPTY));
private final Clustering> clustering;
private final LivenessInfo primaryKeyLivenessInfo;
private final Deletion deletion;
// The data for each columns present in this row in column sorted order.
private final Object[] btree;
private static final ColumnData FIRST_COMPLEX_STATIC = new ComplexColumnData(Columns.FIRST_COMPLEX_STATIC, new Object[0], DeletionTime.build(0, 0));
private static final ColumnData FIRST_COMPLEX_REGULAR = new ComplexColumnData(Columns.FIRST_COMPLEX_REGULAR, new Object[0], DeletionTime.build(0, 0));
private static final Comparator COLUMN_COMPARATOR = (cd1, cd2) -> cd1.column.compareTo(cd2.column);
// We need to filter the tombstones of a row on every read (twice in fact: first to remove purgeable tombstone, and then after reconciliation to remove
// all tombstone since we don't return them to the client) as well as on compaction. But it's likely that many rows won't have any tombstone at all, so
// we want to speed up that case by not having to iterate/copy the row in this case. We could keep a single boolean telling us if we have tombstones,
// but that doesn't work for expiring columns. So instead we keep the deletion time for the first thing in the row to be deleted. This allow at any given
// time to know if we have any deleted information or not. If we any "true" tombstone (i.e. not an expiring cell), this value will be forced to
// Long.MIN_VALUE, but if we don't and have expiring cells, this will the time at which the first expiring cell expires. If we have no tombstones and
// no expiring cells, this will be Cell.MAX_DELETION_TIME;
private final long minLocalDeletionTime;
private BTreeRow(Clustering clustering,
LivenessInfo primaryKeyLivenessInfo,
Deletion deletion,
Object[] btree,
long minLocalDeletionTime)
{
assert !deletion.isShadowedBy(primaryKeyLivenessInfo);
this.clustering = clustering;
this.primaryKeyLivenessInfo = primaryKeyLivenessInfo;
this.deletion = deletion;
this.btree = btree;
this.minLocalDeletionTime = minLocalDeletionTime;
}
private BTreeRow(Clustering> clustering, Object[] btree, long minLocalDeletionTime)
{
this(clustering, LivenessInfo.EMPTY, Deletion.LIVE, btree, minLocalDeletionTime);
}
// Note that it's often easier/safer to use the sortedBuilder/unsortedBuilder or one of the static creation method below. Only directly useful in a small amount of cases.
public static BTreeRow create(Clustering> clustering,
LivenessInfo primaryKeyLivenessInfo,
Deletion deletion,
Object[] btree)
{
long minDeletionTime = Math.min(minDeletionTime(primaryKeyLivenessInfo), minDeletionTime(deletion.time()));
if (minDeletionTime != Long.MIN_VALUE)
{
long result = BTree.accumulate(btree, (cd, l) -> Math.min(l, minDeletionTime(cd)) , minDeletionTime);
minDeletionTime = result;
}
return create(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
}
public static BTreeRow create(Clustering> clustering,
LivenessInfo primaryKeyLivenessInfo,
Deletion deletion,
Object[] btree,
long minDeletionTime)
{
return new BTreeRow(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
}
public static BTreeRow emptyRow(Clustering> clustering)
{
return new BTreeRow(clustering, BTree.empty(), Cell.MAX_DELETION_TIME);
}
public static BTreeRow singleCellRow(Clustering> clustering, Cell> cell)
{
if (cell.column().isSimple())
return new BTreeRow(clustering, BTree.singleton(cell), minDeletionTime(cell));
ComplexColumnData complexData = new ComplexColumnData(cell.column(), new Cell>[]{ cell }, DeletionTime.LIVE);
return new BTreeRow(clustering, BTree.singleton(complexData), minDeletionTime(cell));
}
public static BTreeRow emptyDeletedRow(Clustering> clustering, Deletion deletion)
{
assert !deletion.isLive();
return new BTreeRow(clustering, LivenessInfo.EMPTY, deletion, BTree.empty(), Long.MIN_VALUE);
}
public static BTreeRow noCellLiveRow(Clustering> clustering, LivenessInfo primaryKeyLivenessInfo)
{
assert !primaryKeyLivenessInfo.isEmpty();
return new BTreeRow(clustering,
primaryKeyLivenessInfo,
Deletion.LIVE,
BTree.empty(),
minDeletionTime(primaryKeyLivenessInfo));
}
private static long minDeletionTime(Cell> cell)
{
return cell.isTombstone() ? Long.MIN_VALUE : cell.localDeletionTime();
}
private static long minDeletionTime(LivenessInfo info)
{
return info.isExpiring() ? info.localExpirationTime() : Cell.MAX_DELETION_TIME;
}
private static long minDeletionTime(DeletionTime dt)
{
return dt.isLive() ? Cell.MAX_DELETION_TIME : Long.MIN_VALUE;
}
private static long minDeletionTime(ComplexColumnData cd)
{
long min = minDeletionTime(cd.complexDeletion());
for (Cell> cell : cd)
{
min = Math.min(min, minDeletionTime(cell));
if (min == Long.MIN_VALUE)
break;
}
return min;
}
private static long minDeletionTime(ColumnData cd)
{
return cd.column().isSimple() ? minDeletionTime((Cell>) cd) : minDeletionTime((ComplexColumnData)cd);
}
public void apply(Consumer function)
{
BTree.apply(btree, function);
}
public void apply(BiConsumer function, A arg)
{
BTree.apply(btree, function, arg);
}
public long accumulate(LongAccumulator accumulator, long initialValue)
{
return BTree.accumulate(btree, accumulator, initialValue);
}
public long accumulate(LongAccumulator accumulator, Comparator comparator, ColumnData from, long initialValue)
{
return BTree.accumulate(btree, accumulator, comparator, from, initialValue);
}
public long accumulate(BiLongAccumulator accumulator, A arg, long initialValue)
{
return BTree.accumulate(btree, accumulator, arg, initialValue);
}
public long accumulate(BiLongAccumulator accumulator, A arg, Comparator comparator, ColumnData from, long initialValue)
{
return BTree.accumulate(btree, accumulator, arg, comparator, from, initialValue);
}
private static long minDeletionTime(Object[] btree, LivenessInfo info, DeletionTime rowDeletion)
{
long min = Math.min(minDeletionTime(info), minDeletionTime(rowDeletion));
return BTree.accumulate(btree, (cd, l) -> Math.min(l, minDeletionTime(cd)), min);
}
public Clustering> clustering()
{
return clustering;
}
public Collection columns()
{
return Collections2.transform(columnData(), ColumnData::column);
}
public int columnCount()
{
return BTree.size(btree);
}
public LivenessInfo primaryKeyLivenessInfo()
{
return primaryKeyLivenessInfo;
}
public boolean isEmpty()
{
return primaryKeyLivenessInfo().isEmpty()
&& deletion().isLive()
&& BTree.isEmpty(btree);
}
public Deletion deletion()
{
return deletion;
}
public Cell> getCell(ColumnMetadata c)
{
assert !c.isComplex();
return (Cell>) BTree.
© 2015 - 2024 Weber Informatics LLC | Privacy Policy