org.apache.flink.runtime.state.heap.KeyGroupPartitionedPriorityQueue Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.heap;
import org.apache.flink.runtime.state.InternalPriorityQueue;
import org.apache.flink.runtime.state.KeyExtractorFunction;
import org.apache.flink.runtime.state.KeyGroupRange;
import org.apache.flink.runtime.state.KeyGroupRangeAssignment;
import org.apache.flink.runtime.state.KeyGroupedInternalPriorityQueue;
import org.apache.flink.runtime.state.PriorityComparator;
import org.apache.flink.util.CloseableIterator;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.IOUtils;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
/**
* This implementation of {@link InternalPriorityQueue} is internally partitioned into sub-queues per key-group and
* essentially works as a heap-of-heaps. Instances will have set semantics for elements if the sub-queues have set
* semantics.
*
* @param the type of elements in the queue.
* @param type type of sub-queue used for each key-group partition.
*/
public class KeyGroupPartitionedPriorityQueue & HeapPriorityQueueElement>
implements InternalPriorityQueue, KeyGroupedInternalPriorityQueue {
/** A heap of heap sets. Each sub-heap represents the partition for a key-group.*/
@Nonnull
private final HeapPriorityQueue heapOfKeyGroupedHeaps;
/** All elements from keyGroupHeap, indexed by their key-group id, relative to firstKeyGroup. */
@Nonnull
private final PQ[] keyGroupedHeaps;
/** Function to extract the key from contained elements. */
@Nonnull
private final KeyExtractorFunction keyExtractor;
/** The total number of key-groups (in the job). */
@Nonnegative
private final int totalKeyGroups;
/** The smallest key-group id with a subpartition managed by this ordered set. */
@Nonnegative
private final int firstKeyGroup;
@SuppressWarnings("unchecked")
public KeyGroupPartitionedPriorityQueue(
@Nonnull KeyExtractorFunction keyExtractor,
@Nonnull PriorityComparator elementPriorityComparator,
@Nonnull PartitionQueueSetFactory orderedCacheFactory,
@Nonnull KeyGroupRange keyGroupRange,
@Nonnegative int totalKeyGroups) {
this.keyExtractor = keyExtractor;
this.totalKeyGroups = totalKeyGroups;
this.firstKeyGroup = keyGroupRange.getStartKeyGroup();
this.keyGroupedHeaps = (PQ[]) new InternalPriorityQueue[keyGroupRange.getNumberOfKeyGroups()];
this.heapOfKeyGroupedHeaps = new HeapPriorityQueue<>(
new InternalPriorityQueueComparator<>(elementPriorityComparator),
keyGroupRange.getNumberOfKeyGroups());
for (int i = 0; i < keyGroupedHeaps.length; i++) {
final PQ keyGroupSubHeap =
orderedCacheFactory.create(firstKeyGroup + i, totalKeyGroups, keyExtractor, elementPriorityComparator);
keyGroupedHeaps[i] = keyGroupSubHeap;
heapOfKeyGroupedHeaps.add(keyGroupSubHeap);
}
}
@Nullable
@Override
public T poll() {
final PQ headList = heapOfKeyGroupedHeaps.peek();
final T head = headList.poll();
heapOfKeyGroupedHeaps.adjustModifiedElement(headList);
return head;
}
@Nullable
@Override
public T peek() {
return heapOfKeyGroupedHeaps.peek().peek();
}
@Override
public boolean add(@Nonnull T toAdd) {
final PQ list = getKeyGroupSubHeapForElement(toAdd);
// the branch checks if the head element has (potentially) changed.
if (list.add(toAdd)) {
heapOfKeyGroupedHeaps.adjustModifiedElement(list);
// could we have a new head?
return toAdd.equals(peek());
} else {
// head unchanged
return false;
}
}
@Override
public boolean remove(@Nonnull T toRemove) {
final PQ list = getKeyGroupSubHeapForElement(toRemove);
final T oldHead = peek();
// the branch checks if the head element has (potentially) changed.
if (list.remove(toRemove)) {
heapOfKeyGroupedHeaps.adjustModifiedElement(list);
// could we have a new head?
return toRemove.equals(oldHead);
} else {
// head unchanged
return false;
}
}
@Override
public boolean isEmpty() {
return peek() == null;
}
@Override
public int size() {
int sizeSum = 0;
for (PQ list : keyGroupedHeaps) {
sizeSum += list.size();
}
return sizeSum;
}
@Override
public void addAll(@Nullable Collection toAdd) {
if (toAdd == null) {
return;
}
// TODO consider bulk loading the partitions and "heapify" keyGroupHeap once after all elements are inserted.
for (T element : toAdd) {
add(element);
}
}
@Nonnull
@Override
public CloseableIterator iterator() {
return new KeyGroupConcatenationIterator<>(keyGroupedHeaps);
}
private PQ getKeyGroupSubHeapForElement(T element) {
return keyGroupedHeaps[computeKeyGroupIndex(element)];
}
private int computeKeyGroupIndex(T element) {
final Object extractKeyFromElement = keyExtractor.extractKeyFromElement(element);
final int keyGroupId = KeyGroupRangeAssignment.assignToKeyGroup(extractKeyFromElement, totalKeyGroups);
return globalKeyGroupToLocalIndex(keyGroupId);
}
private int globalKeyGroupToLocalIndex(int keyGroupId) {
return keyGroupId - firstKeyGroup;
}
@Nonnull
@Override
public Set getSubsetForKeyGroup(int keyGroupId) {
HashSet result = new HashSet<>();
PQ partitionQueue = keyGroupedHeaps[globalKeyGroupToLocalIndex(keyGroupId)];
try (CloseableIterator iterator = partitionQueue.iterator()) {
while (iterator.hasNext()) {
result.add(iterator.next());
}
} catch (Exception e) {
throw new FlinkRuntimeException("Exception while iterating key group.", e);
}
return result;
}
/**
* Iterator for {@link KeyGroupPartitionedPriorityQueue}. This iterator is not guaranteeing any order of elements.
* Using code must {@link #close()} after usage.
*
* @param the type of iterated elements.
*/
private static final class KeyGroupConcatenationIterator<
T, PQS extends InternalPriorityQueue & HeapPriorityQueueElement>
implements CloseableIterator {
/** Array with the subpartitions that we iterate. No null values in the array. */
@Nonnull
private final PQS[] keyGroupLists;
/** The subpartition the is currently iterated. */
@Nonnegative
private int index;
/** The iterator of the current subpartition. */
@Nonnull
private CloseableIterator current;
private KeyGroupConcatenationIterator(@Nonnull PQS[] keyGroupLists) {
this.keyGroupLists = keyGroupLists;
this.index = 0;
this.current = CloseableIterator.empty();
}
@Override
public boolean hasNext() {
boolean currentHasNext = current.hasNext();
// find the iterator of the next partition that has elements.
while (!currentHasNext && index < keyGroupLists.length) {
IOUtils.closeQuietly(current);
current = keyGroupLists[index++].iterator();
currentHasNext = current.hasNext();
}
return currentHasNext;
}
@Override
public T next() {
return current.next();
}
@Override
public void close() throws Exception {
current.close();
}
}
/**
* Comparator that compares {@link InternalPriorityQueue} objects by their head element. Must handle null results
* from {@link #peek()}.
*
* @param type of the elements in the compared queues.
* @param type of queue.
*/
private static final class InternalPriorityQueueComparator>
implements PriorityComparator {
/** Comparator for the queue elements, so we can compare their heads. */
@Nonnull
private final PriorityComparator elementPriorityComparator;
InternalPriorityQueueComparator(@Nonnull PriorityComparator elementPriorityComparator) {
this.elementPriorityComparator = elementPriorityComparator;
}
@Override
public int comparePriority(Q o1, Q o2) {
final T left = o1.peek();
final T right = o2.peek();
if (left == null) {
return (right == null ? 0 : 1);
} else {
return (right == null ? -1 : elementPriorityComparator.comparePriority(left, right));
}
}
}
/**
* Factory that produces the sub-queues that represent the partitions of a {@link KeyGroupPartitionedPriorityQueue}.
*
* @param type of the elements in the queue set.
* @param type of the priority queue. Must have set semantics and {@link HeapPriorityQueueElement}.
*/
public interface PartitionQueueSetFactory & HeapPriorityQueueElement> {
/**
* Creates a new queue for a given key-group partition.
*
* @param keyGroupId the key-group of the elements managed by the produced queue.
* @param numKeyGroups the total number of key-groups in the job.
* @param elementPriorityComparator the comparator that determines the order of managed elements by priority.
* @return a new queue for the given key-group.
*/
@Nonnull
PQS create(
@Nonnegative int keyGroupId,
@Nonnegative int numKeyGroups,
@Nonnull KeyExtractorFunction keyExtractorFunction,
@Nonnull PriorityComparator elementPriorityComparator);
}
}