com.palantir.atlasdb.keyvalue.cassandra.CellLoadingBatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of atlasdb-cassandra Show documentation
Show all versions of atlasdb-cassandra Show documentation
Palantir open source project
The newest version!
/*
* (c) Copyright 2019 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.palantir.atlasdb.keyvalue.cassandra;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.MultimapBuilder;
import com.google.common.collect.Multimaps;
import com.google.common.primitives.UnsignedBytes;
import com.palantir.atlasdb.cassandra.CassandraCellLoadingConfig;
import com.palantir.atlasdb.keyvalue.api.Cell;
import com.palantir.atlasdb.keyvalue.api.TableReference;
import com.palantir.atlasdb.keyvalue.cassandra.pool.CassandraServer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
/**
* Divides a list of {@link Cell}s into batches for querying.
*
* The batcher partitions cells by columns.
* If for a given column the number of cells provided is at least
* {@link CassandraCellLoadingConfig#crossColumnLoadBatchLimit()}, then the cells for that column will exclusively
* occupy one or more batches, with no batch having size greater than
* {@link CassandraCellLoadingConfig#singleQueryLoadBatchLimit()}.
* Otherwise, the cells provided may be combined with cells for other columns in batches of size up to the value
* from {@link CassandraCellLoadingConfig#crossColumnLoadBatchLimit()}. There is no guarantee that all cells for this
* column will be in the same batch in this case.
*
* Live reloading: Batching will take place following some {@link CassandraCellLoadingConfig} available from
* the supplier during the execution of a partition operation. There is no guarantee as to whether new values
* available during a partition operation will or will not be applied.
*/
final class CellLoadingBatcher {
private final Supplier loadingConfigSupplier;
private final BatchCallback rebatchingManyRowsForColumnCallback;
CellLoadingBatcher(
Supplier loadingConfigSupplier,
BatchCallback rebatchingManyRowsForColumnCallback) {
this.loadingConfigSupplier = loadingConfigSupplier;
this.rebatchingManyRowsForColumnCallback = rebatchingManyRowsForColumnCallback;
}
List> partitionIntoBatches(
Collection cellsToPartition, CassandraServer cassandraServer, TableReference tableReference) {
CassandraCellLoadingConfig config = loadingConfigSupplier.get();
ListMultimap cellsByColumn = indexCellsByColumnName(cellsToPartition);
List> batches = new ArrayList<>();
List cellsForCrossColumnBatching = new ArrayList<>();
for (Map.Entry> cellColumnPair :
Multimaps.asMap(cellsByColumn).entrySet()) {
if (shouldExplicitlyAllocateBatchToColumn(config, cellColumnPair.getValue())) {
batches.addAll(partitionBySingleQueryLoadBatchLimit(
cellColumnPair.getValue(), config, cassandraServer, tableReference));
} else {
cellsForCrossColumnBatching.addAll(cellColumnPair.getValue());
}
}
batches.addAll(Lists.partition(cellsForCrossColumnBatching, config.crossColumnLoadBatchLimit()));
return batches;
}
private List> partitionBySingleQueryLoadBatchLimit(
List cells,
CassandraCellLoadingConfig config,
CassandraServer cassandraServer,
TableReference tableReference) {
if (cells.size() > config.singleQueryLoadBatchLimit()) {
rebatchingManyRowsForColumnCallback.consume(cassandraServer, tableReference, cells.size());
return Lists.partition(cells, config.singleQueryLoadBatchLimit());
}
return ImmutableList.of(cells);
}
private static boolean shouldExplicitlyAllocateBatchToColumn(CassandraCellLoadingConfig config, List cells) {
return cells.size() > config.crossColumnLoadBatchLimit();
}
private static ListMultimap indexCellsByColumnName(Collection cells) {
// Cannot use Multimaps.index(), because byte[] equality is tricky.
ListMultimap cellsByColumn = MultimapBuilder.treeKeys(UnsignedBytes.lexicographicalComparator())
.arrayListValues()
.build();
for (Cell cell : cells) {
cellsByColumn.put(cell.getColumnName(), cell);
}
return cellsByColumn;
}
@FunctionalInterface
interface BatchCallback {
void consume(CassandraServer cassandraServer, TableReference tableReference, int numRows);
}
}
| | | | |