io.datakernel.aggregation.AggregationState Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datakernel-aggregation Show documentation
Show all versions of datakernel-aggregation Show documentation
Log-structured merge-tree table with fields representing aggregate functions, designed for OLAP workload.
/*
* Copyright (C) 2015-2018 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.aggregation;
import io.datakernel.aggregation.AggregationPredicates.RangeScan;
import io.datakernel.aggregation.ot.AggregationDiff;
import io.datakernel.aggregation.ot.AggregationStructure;
import io.datakernel.ot.OTState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import static io.datakernel.aggregation.AggregationPredicates.toRangeScan;
import static io.datakernel.util.CollectionUtils.intersection;
import static io.datakernel.util.Preconditions.checkArgument;
/**
* Represents aggregation metadata. Stores chunks in an index (represented by an array of {@link RangeTree}) for efficient search.
* Provides methods for managing index, querying for chunks by key, searching for chunks that are available for consolidation.
*/
public final class AggregationState implements OTState {
private static final Logger logger = LoggerFactory.getLogger(AggregationState.class);
private final AggregationStructure aggregation;
private final Map chunks = new LinkedHashMap<>();
private RangeTree[] prefixRanges;
private static final int EQUALS_QUERIES_THRESHOLD = 1_000;
private static final Comparator MIN_KEY_ASCENDING_COMPARATOR = new Comparator() {
@Override
public int compare(AggregationChunk chunk1, AggregationChunk chunk2) {
return chunk1.getMinPrimaryKey().compareTo(chunk2.getMinPrimaryKey());
}
};
@SuppressWarnings("unchecked")
AggregationState(AggregationStructure aggregation) {
this.aggregation = aggregation;
initIndex();
}
public Map getChunks() {
return Collections.unmodifiableMap(chunks);
}
@Override
public void apply(AggregationDiff commit) {
for (AggregationChunk chunk : commit.getAddedChunks()) {
addToIndex(chunk);
}
for (AggregationChunk chunk : commit.getRemovedChunks()) {
removeFromIndex(chunk);
}
}
public void addToIndex(AggregationChunk chunk) {
for (int size = 0; size <= aggregation.getKeys().size(); size++) {
RangeTree index = prefixRanges[size];
PrimaryKey lower = chunk.getMinPrimaryKey().prefix(size);
PrimaryKey upper = chunk.getMaxPrimaryKey().prefix(size);
index.put(lower, upper, chunk);
}
chunks.put(chunk.getChunkId(), chunk);
}
public void removeFromIndex(AggregationChunk chunk) {
for (int size = 0; size <= aggregation.getKeys().size(); size++) {
RangeTree index = prefixRanges[size];
PrimaryKey lower = chunk.getMinPrimaryKey().prefix(size);
PrimaryKey upper = chunk.getMaxPrimaryKey().prefix(size);
index.remove(lower, upper, chunk);
}
chunks.remove(chunk.getChunkId());
}
void initIndex() {
this.prefixRanges = new RangeTree[aggregation.getKeys().size() + 1];
for (int size = 0; size <= aggregation.getKeys().size(); size++) {
this.prefixRanges[size] = RangeTree.create();
}
}
@Override
public void init() {
initIndex();
chunks.clear();
}
private static int getNumberOfOverlaps(RangeTree.Segment segment) {
return segment.getSet().size() + segment.getClosingSet().size();
}
public Set findOverlappingChunks() {
int minOverlaps = 2;
Set result = new HashSet<>();
RangeTree tree = prefixRanges[aggregation.getKeys().size()];
for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
RangeTree.Segment segment = segmentEntry.getValue();
int overlaps = getNumberOfOverlaps(segment);
if (overlaps >= minOverlaps) {
result.addAll(segment.getSet());
result.addAll(segment.getClosingSet());
}
}
return result;
}
public List findChunksGroupWithMostOverlaps() {
return findChunksGroupWithMostOverlaps(prefixRanges[aggregation.getKeys().size()]);
}
private static List findChunksGroupWithMostOverlaps(RangeTree tree) {
int maxOverlaps = 2;
List result = new ArrayList<>();
for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
RangeTree.Segment segment = segmentEntry.getValue();
int overlaps = getNumberOfOverlaps(segment);
if (overlaps >= maxOverlaps) {
maxOverlaps = overlaps;
result.clear();
result.addAll(segment.getSet());
result.addAll(segment.getClosingSet());
}
}
return result;
}
private static PickedChunks findChunksWithMinKeyOrSizeFixStrategy(SortedMap> partitioningKeyToTree,
int maxChunks, int optimalChunkSize) {
int minChunks = 2;
for (Map.Entry> entry : partitioningKeyToTree.entrySet()) {
ChunksAndStrategy chunksAndStrategy = findChunksWithMinKeyOrSizeFixStrategy(entry.getValue(), maxChunks, optimalChunkSize);
if (chunksAndStrategy.chunks.size() >= minChunks)
return new PickedChunks(chunksAndStrategy.strategy, entry.getValue(), chunksAndStrategy.chunks);
}
return new PickedChunks(PickingStrategy.MIN_KEY, null, Collections.emptyList());
}
private static ChunksAndStrategy findChunksWithMinKeyOrSizeFixStrategy(RangeTree tree,
int maxChunks, int optimalChunkSize) {
int minOverlaps = 2;
List result = new ArrayList<>();
SortedMap> tailMap = null;
for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
RangeTree.Segment segment = segmentEntry.getValue();
int overlaps = getNumberOfOverlaps(segment);
// "min key" strategy
if (overlaps >= minOverlaps) {
result.addAll(segment.getSet());
result.addAll(segment.getClosingSet());
return new ChunksAndStrategy(PickingStrategy.MIN_KEY, result);
}
List segmentChunks = new ArrayList<>();
segmentChunks.addAll(segment.getSet());
segmentChunks.addAll(segment.getClosingSet());
// "size fix" strategy
if (overlaps == 1 && segmentChunks.get(0).getCount() != optimalChunkSize) {
tailMap = tree.getSegments().tailMap(segmentEntry.getKey());
break;
}
}
if (tailMap == null)
return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, Collections.emptyList());
Set chunks = new HashSet<>();
for (Map.Entry> segmentEntry : tailMap.entrySet()) {
if (chunks.size() >= maxChunks)
break;
RangeTree.Segment segment = segmentEntry.getValue();
chunks.addAll(segment.getSet());
chunks.addAll(segment.getClosingSet());
}
result.addAll(chunks);
if (result.size() == 1) {
if (result.get(0).getCount() > optimalChunkSize)
return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, result);
else
return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, Collections.emptyList());
}
return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, result);
}
private enum PickingStrategy {
PARTITIONING,
HOT_SEGMENT,
MIN_KEY,
SIZE_FIX
}
private static class PickedChunks {
private final PickingStrategy strategy;
private final RangeTree partitionTree;
private final List chunks;
public PickedChunks(PickingStrategy strategy, RangeTree partitionTree,
List chunks) {
this.strategy = strategy;
this.partitionTree = partitionTree;
this.chunks = chunks;
}
}
private static class ChunksAndStrategy {
private final PickingStrategy strategy;
private final List chunks;
public ChunksAndStrategy(PickingStrategy strategy, List chunks) {
this.strategy = strategy;
this.chunks = chunks;
}
}
// visibleForTest
SortedMap> groupByPartition(int partitioningKeyLength) {
SortedMap> partitioningKeyToTree = new TreeMap<>();
Set allChunks = prefixRanges[0].getAll();
for (AggregationChunk chunk : allChunks) {
PrimaryKey minKeyPrefix = chunk.getMinPrimaryKey().prefix(partitioningKeyLength);
PrimaryKey maxKeyPrefix = chunk.getMaxPrimaryKey().prefix(partitioningKeyLength);
if (!minKeyPrefix.equals(maxKeyPrefix))
return null; // not partitioned
RangeTree tree = partitioningKeyToTree.get(minKeyPrefix);
if (tree == null) {
tree = RangeTree.create();
partitioningKeyToTree.put(minKeyPrefix, tree);
}
tree.put(chunk.getMinPrimaryKey(), chunk.getMaxPrimaryKey(), chunk);
}
return partitioningKeyToTree;
}
private List findChunksForPartitioning(int partitioningKeyLength, int maxChunks) {
List chunksForPartitioning = new ArrayList<>();
List allChunks = new ArrayList<>(prefixRanges[0].getAll());
Collections.sort(allChunks, MIN_KEY_ASCENDING_COMPARATOR);
for (AggregationChunk chunk : allChunks) {
if (chunksForPartitioning.size() == maxChunks)
break;
PrimaryKey minKeyPrefix = chunk.getMinPrimaryKey().prefix(partitioningKeyLength);
PrimaryKey maxKeyPrefix = chunk.getMaxPrimaryKey().prefix(partitioningKeyLength);
if (!minKeyPrefix.equals(maxKeyPrefix))
chunksForPartitioning.add(chunk);
}
return chunksForPartitioning;
}
public List findChunksForConsolidationMinKey(int maxChunks, int optimalChunkSize) {
int partitioningKeyLength = aggregation.getPartitioningKey().size();
SortedMap> partitioningKeyToTree = groupByPartition(partitioningKeyLength);
if (partitioningKeyToTree == null) { // not partitioned
List chunks = findChunksForPartitioning(partitioningKeyLength, maxChunks);
logChunksAndStrategy(chunks, PickingStrategy.PARTITIONING);
return chunks; // launch partitioning
}
PickedChunks pickedChunks = findChunksWithMinKeyOrSizeFixStrategy(partitioningKeyToTree, maxChunks, optimalChunkSize);
return processSelection(pickedChunks.chunks, maxChunks, pickedChunks.partitionTree, pickedChunks.strategy);
}
public List findChunksForConsolidationHotSegment(int maxChunks) {
RangeTree tree = prefixRanges[aggregation.getKeys().size()];
List chunks = findChunksGroupWithMostOverlaps(tree);
return processSelection(chunks, maxChunks, tree, PickingStrategy.HOT_SEGMENT);
}
private static List processSelection(List chunks, int maxChunks,
RangeTree partitionTree,
PickingStrategy strategy) {
if (chunks.isEmpty() || chunks.size() == maxChunks) {
logChunksAndStrategy(chunks, strategy);
return chunks;
}
if (chunks.size() > maxChunks) {
List trimmedChunks = trimChunks(chunks, maxChunks);
logChunksAndStrategy(trimmedChunks, strategy);
return trimmedChunks;
}
if (strategy == PickingStrategy.SIZE_FIX) {
logChunksAndStrategy(chunks, strategy);
return chunks;
}
List expandedChunks = expandRange(partitionTree, chunks, maxChunks);
if (expandedChunks.size() > maxChunks) {
List trimmedChunks = trimChunks(expandedChunks, maxChunks);
logChunksAndStrategy(trimmedChunks, strategy);
return trimmedChunks;
}
logChunksAndStrategy(expandedChunks, strategy);
return expandedChunks;
}
private static void logChunksAndStrategy(Collection chunks, PickingStrategy strategy) {
if (logger.isInfoEnabled()) {
String chunkIds = chunks.stream()
.map(AggregationChunk::getChunkId)
.map(Object::toString)
.collect(Collectors.joining(",", "[", "]"));
logger.info("Chunks for consolidation {}: {}. Strategy: {}", chunks.size(), chunkIds, strategy);
}
}
private static List trimChunks(List chunks, int maxChunks) {
Collections.sort(chunks, MIN_KEY_ASCENDING_COMPARATOR);
return chunks.subList(0, maxChunks);
}
private static boolean expandRange(RangeTree tree, Set chunks) {
PrimaryKey minKey = null;
PrimaryKey maxKey = null;
for (AggregationChunk chunk : chunks) {
PrimaryKey chunkMinKey = chunk.getMinPrimaryKey();
PrimaryKey chunkMaxKey = chunk.getMaxPrimaryKey();
if (minKey == null) {
minKey = chunkMinKey;
maxKey = chunkMaxKey;
continue;
}
if (chunkMinKey.compareTo(minKey) < 0)
minKey = chunkMinKey;
if (chunkMaxKey.compareTo(maxKey) > 0)
maxKey = chunkMaxKey;
}
Set chunksForRange = tree.getRange(minKey, maxKey);
return chunks.addAll(chunksForRange);
}
private static void expandRange(RangeTree tree, Set chunks, int maxChunks) {
boolean expand = chunks.size() < maxChunks;
while (expand) {
boolean expanded = expandRange(tree, chunks);
expand = expanded && chunks.size() < maxChunks;
}
}
private static List expandRange(RangeTree tree,
List chunks, int maxChunks) {
Set chunkSet = new HashSet<>(chunks);
expandRange(tree, chunkSet, maxChunks);
return new ArrayList<>(chunkSet);
}
public List getConsolidationDebugInfo() {
List infos = new ArrayList<>();
RangeTree tree = prefixRanges[aggregation.getKeys().size()];
for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
PrimaryKey key = segmentEntry.getKey();
RangeTree.Segment segment = segmentEntry.getValue();
int overlaps = segment.getSet().size() + segment.getClosingSet().size();
Set segmentSet = segment.getSet();
Set segmentClosingSet = segment.getClosingSet();
infos.add(new ConsolidationDebugInfo(key, segmentSet, segmentClosingSet, overlaps));
}
return infos;
}
public static class ConsolidationDebugInfo {
public final PrimaryKey key;
public final Set segmentSet;
public final Set segmentClosingSet;
public final int overlaps;
public ConsolidationDebugInfo(PrimaryKey key, Set segmentSet,
Set segmentClosingSet, int overlaps) {
this.key = key;
this.segmentSet = segmentSet;
this.segmentClosingSet = segmentClosingSet;
this.overlaps = overlaps;
}
}
// visibleForTesting
public static boolean chunkMightContainQueryValues(PrimaryKey minQueryKey, PrimaryKey maxQueryKey,
PrimaryKey minChunkKey, PrimaryKey maxChunkKey) {
return chunkMightContainQueryValues(minQueryKey.values(), maxQueryKey.values(),
minChunkKey.values(), maxChunkKey.values());
}
private Predicate chunkMightContainQueryValuesPredicate(PrimaryKey minQueryKey,
PrimaryKey maxQueryKey) {
return chunk -> {
List