All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.datakernel.aggregation.AggregationState Maven / Gradle / Ivy

Go to download

Log-structured merge-tree table with fields representing aggregate functions, designed for OLAP workload.

There is a newer version: 3.1.0
Show newest version
/*
 * Copyright (C) 2015-2018 SoftIndex LLC.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.datakernel.aggregation;

import io.datakernel.aggregation.AggregationPredicates.RangeScan;
import io.datakernel.aggregation.ot.AggregationDiff;
import io.datakernel.aggregation.ot.AggregationStructure;
import io.datakernel.ot.OTState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static io.datakernel.aggregation.AggregationPredicates.toRangeScan;
import static io.datakernel.util.CollectionUtils.intersection;
import static io.datakernel.util.Preconditions.checkArgument;

/**
 * Represents aggregation metadata. Stores chunks in an index (represented by an array of {@link RangeTree}) for efficient search.
 * Provides methods for managing index, querying for chunks by key, searching for chunks that are available for consolidation.
 */
public final class AggregationState implements OTState {
	private static final Logger logger = LoggerFactory.getLogger(AggregationState.class);

	private final AggregationStructure aggregation;

	private final Map chunks = new LinkedHashMap<>();
	private RangeTree[] prefixRanges;

	private static final int EQUALS_QUERIES_THRESHOLD = 1_000;
	private static final Comparator MIN_KEY_ASCENDING_COMPARATOR = new Comparator() {
		@Override
		public int compare(AggregationChunk chunk1, AggregationChunk chunk2) {
			return chunk1.getMinPrimaryKey().compareTo(chunk2.getMinPrimaryKey());
		}
	};

	@SuppressWarnings("unchecked")
	AggregationState(AggregationStructure aggregation) {
		this.aggregation = aggregation;
		initIndex();
	}

	public Map getChunks() {
		return Collections.unmodifiableMap(chunks);
	}

	@Override
	public void apply(AggregationDiff commit) {
		for (AggregationChunk chunk : commit.getAddedChunks()) {
			addToIndex(chunk);
		}

		for (AggregationChunk chunk : commit.getRemovedChunks()) {
			removeFromIndex(chunk);
		}
	}

	public void addToIndex(AggregationChunk chunk) {
		for (int size = 0; size <= aggregation.getKeys().size(); size++) {
			RangeTree index = prefixRanges[size];

			PrimaryKey lower = chunk.getMinPrimaryKey().prefix(size);
			PrimaryKey upper = chunk.getMaxPrimaryKey().prefix(size);
			index.put(lower, upper, chunk);
		}
		chunks.put(chunk.getChunkId(), chunk);
	}

	public void removeFromIndex(AggregationChunk chunk) {
		for (int size = 0; size <= aggregation.getKeys().size(); size++) {
			RangeTree index = prefixRanges[size];

			PrimaryKey lower = chunk.getMinPrimaryKey().prefix(size);
			PrimaryKey upper = chunk.getMaxPrimaryKey().prefix(size);
			index.remove(lower, upper, chunk);
		}
		chunks.remove(chunk.getChunkId());
	}

	void initIndex() {
		this.prefixRanges = new RangeTree[aggregation.getKeys().size() + 1];
		for (int size = 0; size <= aggregation.getKeys().size(); size++) {
			this.prefixRanges[size] = RangeTree.create();
		}
	}

	@Override
	public void init() {
		initIndex();
		chunks.clear();
	}

	private static int getNumberOfOverlaps(RangeTree.Segment segment) {
		return segment.getSet().size() + segment.getClosingSet().size();
	}

	public Set findOverlappingChunks() {
		int minOverlaps = 2;
		Set result = new HashSet<>();
		RangeTree tree = prefixRanges[aggregation.getKeys().size()];
		for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
			RangeTree.Segment segment = segmentEntry.getValue();
			int overlaps = getNumberOfOverlaps(segment);
			if (overlaps >= minOverlaps) {
				result.addAll(segment.getSet());
				result.addAll(segment.getClosingSet());
			}
		}
		return result;
	}

	public List findChunksGroupWithMostOverlaps() {
		return findChunksGroupWithMostOverlaps(prefixRanges[aggregation.getKeys().size()]);
	}

	private static List findChunksGroupWithMostOverlaps(RangeTree tree) {
		int maxOverlaps = 2;
		List result = new ArrayList<>();
		for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
			RangeTree.Segment segment = segmentEntry.getValue();
			int overlaps = getNumberOfOverlaps(segment);
			if (overlaps >= maxOverlaps) {
				maxOverlaps = overlaps;
				result.clear();
				result.addAll(segment.getSet());
				result.addAll(segment.getClosingSet());
			}
		}
		return result;
	}

	private static PickedChunks findChunksWithMinKeyOrSizeFixStrategy(SortedMap> partitioningKeyToTree,
	                                                                  int maxChunks, int optimalChunkSize) {
		int minChunks = 2;
		for (Map.Entry> entry : partitioningKeyToTree.entrySet()) {
			ChunksAndStrategy chunksAndStrategy = findChunksWithMinKeyOrSizeFixStrategy(entry.getValue(), maxChunks, optimalChunkSize);
			if (chunksAndStrategy.chunks.size() >= minChunks)
				return new PickedChunks(chunksAndStrategy.strategy, entry.getValue(), chunksAndStrategy.chunks);
		}
		return new PickedChunks(PickingStrategy.MIN_KEY, null, Collections.emptyList());
	}

	private static ChunksAndStrategy findChunksWithMinKeyOrSizeFixStrategy(RangeTree tree,
	                                                                       int maxChunks, int optimalChunkSize) {
		int minOverlaps = 2;
		List result = new ArrayList<>();
		SortedMap> tailMap = null;
		for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
			RangeTree.Segment segment = segmentEntry.getValue();
			int overlaps = getNumberOfOverlaps(segment);

			// "min key" strategy
			if (overlaps >= minOverlaps) {
				result.addAll(segment.getSet());
				result.addAll(segment.getClosingSet());
				return new ChunksAndStrategy(PickingStrategy.MIN_KEY, result);
			}

			List segmentChunks = new ArrayList<>();
			segmentChunks.addAll(segment.getSet());
			segmentChunks.addAll(segment.getClosingSet());

			// "size fix" strategy
			if (overlaps == 1 && segmentChunks.get(0).getCount() != optimalChunkSize) {
				tailMap = tree.getSegments().tailMap(segmentEntry.getKey());
				break;
			}
		}

		if (tailMap == null)
			return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, Collections.emptyList());

		Set chunks = new HashSet<>();
		for (Map.Entry> segmentEntry : tailMap.entrySet()) {
			if (chunks.size() >= maxChunks)
				break;

			RangeTree.Segment segment = segmentEntry.getValue();
			chunks.addAll(segment.getSet());
			chunks.addAll(segment.getClosingSet());
		}
		result.addAll(chunks);

		if (result.size() == 1) {
			if (result.get(0).getCount() > optimalChunkSize)
				return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, result);
			else
				return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, Collections.emptyList());
		}

		return new ChunksAndStrategy(PickingStrategy.SIZE_FIX, result);
	}

	private enum PickingStrategy {
		PARTITIONING,
		HOT_SEGMENT,
		MIN_KEY,
		SIZE_FIX
	}

	private static class PickedChunks {
		private final PickingStrategy strategy;
		private final RangeTree partitionTree;
		private final List chunks;

		public PickedChunks(PickingStrategy strategy, RangeTree partitionTree,
		                    List chunks) {
			this.strategy = strategy;
			this.partitionTree = partitionTree;
			this.chunks = chunks;
		}
	}

	private static class ChunksAndStrategy {
		private final PickingStrategy strategy;
		private final List chunks;

		public ChunksAndStrategy(PickingStrategy strategy, List chunks) {
			this.strategy = strategy;
			this.chunks = chunks;
		}
	}

	// visibleForTest
	SortedMap> groupByPartition(int partitioningKeyLength) {
		SortedMap> partitioningKeyToTree = new TreeMap<>();

		Set allChunks = prefixRanges[0].getAll();
		for (AggregationChunk chunk : allChunks) {
			PrimaryKey minKeyPrefix = chunk.getMinPrimaryKey().prefix(partitioningKeyLength);
			PrimaryKey maxKeyPrefix = chunk.getMaxPrimaryKey().prefix(partitioningKeyLength);

			if (!minKeyPrefix.equals(maxKeyPrefix))
				return null; // not partitioned

			RangeTree tree = partitioningKeyToTree.get(minKeyPrefix);
			if (tree == null) {
				tree = RangeTree.create();
				partitioningKeyToTree.put(minKeyPrefix, tree);
			}

			tree.put(chunk.getMinPrimaryKey(), chunk.getMaxPrimaryKey(), chunk);
		}

		return partitioningKeyToTree;
	}

	private List findChunksForPartitioning(int partitioningKeyLength, int maxChunks) {
		List chunksForPartitioning = new ArrayList<>();
		List allChunks = new ArrayList<>(prefixRanges[0].getAll());
		Collections.sort(allChunks, MIN_KEY_ASCENDING_COMPARATOR);

		for (AggregationChunk chunk : allChunks) {
			if (chunksForPartitioning.size() == maxChunks)
				break;

			PrimaryKey minKeyPrefix = chunk.getMinPrimaryKey().prefix(partitioningKeyLength);
			PrimaryKey maxKeyPrefix = chunk.getMaxPrimaryKey().prefix(partitioningKeyLength);

			if (!minKeyPrefix.equals(maxKeyPrefix))
				chunksForPartitioning.add(chunk);
		}

		return chunksForPartitioning;
	}

	public List findChunksForConsolidationMinKey(int maxChunks, int optimalChunkSize) {
		int partitioningKeyLength = aggregation.getPartitioningKey().size();
		SortedMap> partitioningKeyToTree = groupByPartition(partitioningKeyLength);
		if (partitioningKeyToTree == null) { // not partitioned
			List chunks = findChunksForPartitioning(partitioningKeyLength, maxChunks);
			logChunksAndStrategy(chunks, PickingStrategy.PARTITIONING);
			return chunks; // launch partitioning
		}
		PickedChunks pickedChunks = findChunksWithMinKeyOrSizeFixStrategy(partitioningKeyToTree, maxChunks, optimalChunkSize);
		return processSelection(pickedChunks.chunks, maxChunks, pickedChunks.partitionTree, pickedChunks.strategy);
	}

	public List findChunksForConsolidationHotSegment(int maxChunks) {
		RangeTree tree = prefixRanges[aggregation.getKeys().size()];
		List chunks = findChunksGroupWithMostOverlaps(tree);
		return processSelection(chunks, maxChunks, tree, PickingStrategy.HOT_SEGMENT);
	}

	private static List processSelection(List chunks, int maxChunks,
	                                                       RangeTree partitionTree,
	                                                       PickingStrategy strategy) {
		if (chunks.isEmpty() || chunks.size() == maxChunks) {
			logChunksAndStrategy(chunks, strategy);
			return chunks;
		}

		if (chunks.size() > maxChunks) {
			List trimmedChunks = trimChunks(chunks, maxChunks);
			logChunksAndStrategy(trimmedChunks, strategy);
			return trimmedChunks;
		}

		if (strategy == PickingStrategy.SIZE_FIX) {
			logChunksAndStrategy(chunks, strategy);
			return chunks;
		}

		List expandedChunks = expandRange(partitionTree, chunks, maxChunks);

		if (expandedChunks.size() > maxChunks) {
			List trimmedChunks = trimChunks(expandedChunks, maxChunks);
			logChunksAndStrategy(trimmedChunks, strategy);
			return trimmedChunks;
		}

		logChunksAndStrategy(expandedChunks, strategy);
		return expandedChunks;
	}

	private static void logChunksAndStrategy(Collection chunks, PickingStrategy strategy) {
		if (logger.isInfoEnabled()) {
			String chunkIds = chunks.stream()
					.map(AggregationChunk::getChunkId)
					.map(Object::toString)
					.collect(Collectors.joining(",", "[", "]"));
			logger.info("Chunks for consolidation {}: {}. Strategy: {}", chunks.size(), chunkIds, strategy);
		}
	}

	private static List trimChunks(List chunks, int maxChunks) {
		Collections.sort(chunks, MIN_KEY_ASCENDING_COMPARATOR);
		return chunks.subList(0, maxChunks);
	}

	private static boolean expandRange(RangeTree tree, Set chunks) {
		PrimaryKey minKey = null;
		PrimaryKey maxKey = null;

		for (AggregationChunk chunk : chunks) {
			PrimaryKey chunkMinKey = chunk.getMinPrimaryKey();
			PrimaryKey chunkMaxKey = chunk.getMaxPrimaryKey();

			if (minKey == null) {
				minKey = chunkMinKey;
				maxKey = chunkMaxKey;
				continue;
			}

			if (chunkMinKey.compareTo(minKey) < 0)
				minKey = chunkMinKey;

			if (chunkMaxKey.compareTo(maxKey) > 0)
				maxKey = chunkMaxKey;
		}

		Set chunksForRange = tree.getRange(minKey, maxKey);
		return chunks.addAll(chunksForRange);
	}

	private static void expandRange(RangeTree tree, Set chunks, int maxChunks) {
		boolean expand = chunks.size() < maxChunks;

		while (expand) {
			boolean expanded = expandRange(tree, chunks);
			expand = expanded && chunks.size() < maxChunks;
		}
	}

	private static List expandRange(RangeTree tree,
	                                                  List chunks, int maxChunks) {
		Set chunkSet = new HashSet<>(chunks);
		expandRange(tree, chunkSet, maxChunks);
		return new ArrayList<>(chunkSet);
	}

	public List getConsolidationDebugInfo() {
		List infos = new ArrayList<>();
		RangeTree tree = prefixRanges[aggregation.getKeys().size()];

		for (Map.Entry> segmentEntry : tree.getSegments().entrySet()) {
			PrimaryKey key = segmentEntry.getKey();
			RangeTree.Segment segment = segmentEntry.getValue();
			int overlaps = segment.getSet().size() + segment.getClosingSet().size();
			Set segmentSet = segment.getSet();
			Set segmentClosingSet = segment.getClosingSet();
			infos.add(new ConsolidationDebugInfo(key, segmentSet, segmentClosingSet, overlaps));
		}

		return infos;
	}

	public static class ConsolidationDebugInfo {
		public final PrimaryKey key;
		public final Set segmentSet;
		public final Set segmentClosingSet;
		public final int overlaps;

		public ConsolidationDebugInfo(PrimaryKey key, Set segmentSet,
		                              Set segmentClosingSet, int overlaps) {
			this.key = key;
			this.segmentSet = segmentSet;
			this.segmentClosingSet = segmentClosingSet;
			this.overlaps = overlaps;
		}
	}

	// visibleForTesting
	public static boolean chunkMightContainQueryValues(PrimaryKey minQueryKey, PrimaryKey maxQueryKey,
	                                                   PrimaryKey minChunkKey, PrimaryKey maxChunkKey) {
		return chunkMightContainQueryValues(minQueryKey.values(), maxQueryKey.values(),
				minChunkKey.values(), maxChunkKey.values());
	}

	private Predicate chunkMightContainQueryValuesPredicate(PrimaryKey minQueryKey,
	                                                                          PrimaryKey maxQueryKey) {
		return chunk -> {
			List queryMinValues = minQueryKey.values();
			List queryMaxValues = maxQueryKey.values();
			List chunkMinValues = chunk.getMinPrimaryKey().values();
			List chunkMaxValues = chunk.getMaxPrimaryKey().values();

			return chunkMightContainQueryValues(queryMinValues, queryMaxValues, chunkMinValues, chunkMaxValues);
		};
	}

	@SuppressWarnings("unchecked")
	private static boolean chunkMightContainQueryValues(List queryMinValues, List queryMaxValues,
	                                                    List chunkMinValues, List chunkMaxValues) {
		checkArgument(queryMinValues.size() == queryMaxValues.size());
		checkArgument(chunkMinValues.size() == chunkMaxValues.size());

		for (int i = 0; i < queryMinValues.size(); ++i) {
			Comparable queryMinValue = (Comparable) queryMinValues.get(i);
			Comparable queryMaxValue = (Comparable) queryMaxValues.get(i);
			Comparable chunkMinValue = (Comparable) chunkMinValues.get(i);
			Comparable chunkMaxValue = (Comparable) chunkMaxValues.get(i);

			if (chunkMinValue.compareTo(chunkMaxValue) == 0) {
				if (!(queryMinValue.compareTo(chunkMinValue) <= 0 && queryMaxValue.compareTo(chunkMaxValue) >= 0)) {
					return false;
				}
			} else {
				return queryMinValue.compareTo(chunkMaxValue) <= 0 && queryMaxValue.compareTo(chunkMinValue) >= 0;
			}
		}

		return true;
	}

	@SuppressWarnings("unchecked")
	public List findChunks(AggregationPredicate predicate, List fields) {
		Set requestedFields = new HashSet<>(fields);

		RangeScan rangeScan = toRangeScan(predicate, aggregation.getKeys(), aggregation.getKeyTypes());

		List chunks = new ArrayList<>();
		for (AggregationChunk chunk : rangeQuery(rangeScan.getFrom(), rangeScan.getTo())) {
			if (intersection(new HashSet<>(chunk.getMeasures()), requestedFields).isEmpty())
				continue;

			chunks.add(chunk);
		}

		return chunks;
	}

	private List rangeQuery(PrimaryKey minPrimaryKey, PrimaryKey maxPrimaryKey) {
		checkArgument(minPrimaryKey.size() == maxPrimaryKey.size());
		int size = minPrimaryKey.size();
		RangeTree index = prefixRanges[size];
		return new ArrayList<>(index.getRange(minPrimaryKey, maxPrimaryKey));
	}

	@Override
	public String toString() {
		return "Aggregation{keys=" + aggregation.getKeys() + ", fields=" + aggregation.getMeasures() + '}';
	}
}