org.apache.flink.runtime.state.KeyGroupPartitioner Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.util.Preconditions;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
/**
* Class that contains the base algorithm for partitioning data into key-groups. This algorithm
* currently works with two array (input, output) for optimal algorithmic complexity. Notice that
* this could also be implemented over a single array, using some cuckoo-hashing-style element
* replacement. This would have worse algorithmic complexity but better space efficiency. We
* currently prefer the trade-off in favor of better algorithmic complexity.
*
* @param type of the partitioned elements.
*/
public class KeyGroupPartitioner {
/**
* The input data for the partitioning. All elements to consider must be densely in the index
* interval [0, {@link #numberOfElements}[, without null values.
*/
@Nonnull private final T[] partitioningSource;
/**
* The output array for the partitioning. The size must be {@link #numberOfElements} (or
* bigger).
*/
@Nonnull private final T[] partitioningDestination;
/** Total number of input elements. */
@Nonnegative private final int numberOfElements;
/** The total number of key-groups in the job. */
@Nonnegative private final int totalKeyGroups;
/**
* This bookkeeping array is used to count the elements in each key-group. In a second step, it
* is transformed into a histogram by accumulation.
*/
@Nonnull private final int[] counterHistogram;
/**
* This is a helper array that caches the key-group for each element, so we do not have to
* compute them twice.
*/
@Nonnull private final int[] elementKeyGroups;
/** Cached value of keyGroupRange#firstKeyGroup. */
@Nonnegative private final int firstKeyGroup;
/** Function to extract the key from a given element. */
@Nonnull private final KeyExtractorFunction keyExtractorFunction;
/** Function to write an element to a {@link DataOutputView}. */
@Nonnull private final ElementWriterFunction elementWriterFunction;
/** Cached result. */
@Nullable private PartitioningResult computedResult;
/**
* Creates a new {@link KeyGroupPartitioner}.
*
* @param partitioningSource the input for the partitioning. All elements must be densely packed
* in the index interval [0, {@link #numberOfElements}[, without null values.
* @param numberOfElements the number of elements to consider from the input, starting at input
* index 0.
* @param partitioningDestination the output of the partitioning. Must have capacity of at least
* numberOfElements.
* @param keyGroupRange the key-group range of the data that will be partitioned by this
* instance.
* @param totalKeyGroups the total number of key groups in the job.
* @param keyExtractorFunction this function extracts the partition key from an element.
*/
public KeyGroupPartitioner(
@Nonnull T[] partitioningSource,
@Nonnegative int numberOfElements,
@Nonnull T[] partitioningDestination,
@Nonnull KeyGroupRange keyGroupRange,
@Nonnegative int totalKeyGroups,
@Nonnull KeyExtractorFunction keyExtractorFunction,
@Nonnull ElementWriterFunction elementWriterFunction) {
Preconditions.checkState(partitioningSource != partitioningDestination);
Preconditions.checkState(partitioningSource.length >= numberOfElements);
Preconditions.checkState(partitioningDestination.length >= numberOfElements);
this.partitioningSource = partitioningSource;
this.partitioningDestination = partitioningDestination;
this.numberOfElements = numberOfElements;
this.totalKeyGroups = totalKeyGroups;
this.keyExtractorFunction = keyExtractorFunction;
this.elementWriterFunction = elementWriterFunction;
this.firstKeyGroup = keyGroupRange.getStartKeyGroup();
this.elementKeyGroups = new int[numberOfElements];
this.counterHistogram = new int[keyGroupRange.getNumberOfKeyGroups()];
this.computedResult = null;
}
/**
* Partitions the data into key-groups and returns the result as a {@link PartitioningResult}.
*/
public PartitioningResult partitionByKeyGroup() {
if (computedResult == null) {
reportAllElementKeyGroups();
int outputNumberOfElements = buildHistogramByAccumulatingCounts();
executePartitioning(outputNumberOfElements);
}
return computedResult;
}
/** This method iterates over the input data and reports the key-group for each element. */
protected void reportAllElementKeyGroups() {
Preconditions.checkState(partitioningSource.length >= numberOfElements);
for (int i = 0; i < numberOfElements; ++i) {
int keyGroup =
KeyGroupRangeAssignment.assignToKeyGroup(
keyExtractorFunction.extractKeyFromElement(partitioningSource[i]),
totalKeyGroups);
reportKeyGroupOfElementAtIndex(i, keyGroup);
}
}
/**
* This method reports in the bookkeeping data that the element at the given index belongs to
* the given key-group.
*/
protected void reportKeyGroupOfElementAtIndex(int index, int keyGroup) {
final int keyGroupIndex = keyGroup - firstKeyGroup;
elementKeyGroups[index] = keyGroupIndex;
++counterHistogram[keyGroupIndex];
}
/**
* This method creates a histogram from the counts per key-group in {@link #counterHistogram}.
*/
private int buildHistogramByAccumulatingCounts() {
int sum = 0;
for (int i = 0; i < counterHistogram.length; ++i) {
int currentSlotValue = counterHistogram[i];
counterHistogram[i] = sum;
sum += currentSlotValue;
}
return sum;
}
private void executePartitioning(int outputNumberOfElements) {
// We repartition the entries by their pre-computed key-groups, using the histogram values
// as write indexes
for (int inIdx = 0; inIdx < outputNumberOfElements; ++inIdx) {
int effectiveKgIdx = elementKeyGroups[inIdx];
int outIdx = counterHistogram[effectiveKgIdx]++;
partitioningDestination[outIdx] = partitioningSource[inIdx];
}
this.computedResult =
new PartitioningResultImpl<>(
elementWriterFunction,
firstKeyGroup,
counterHistogram,
partitioningDestination);
}
/** This represents the result of key-group partitioning. */
public interface PartitioningResult extends StateSnapshot.StateKeyGroupWriter {
Iterator iterator(int keyGroupId);
}
/** The data in {@link * #partitionedElements} is partitioned w.r.t. key group range. */
private static class PartitioningResultImpl implements PartitioningResult {
/** Function to write one element to a {@link DataOutputView}. */
@Nonnull private final ElementWriterFunction elementWriterFunction;
/**
* The exclusive-end-offsets for all key-groups of the covered range for the partitioning.
* Exclusive-end-offset for key-group n is under keyGroupOffsets[n - firstKeyGroup].
*/
@Nonnull private final int[] keyGroupOffsets;
/**
* Array with elements that are partitioned w.r.t. the covered key-group range. The start
* offset for each key-group is in {@link #keyGroupOffsets}.
*/
@Nonnull private final T[] partitionedElements;
/** The first key-group of the range covered in the partitioning. */
@Nonnegative private final int firstKeyGroup;
PartitioningResultImpl(
@Nonnull ElementWriterFunction elementWriterFunction,
@Nonnegative int firstKeyGroup,
@Nonnull int[] keyGroupEndOffsets,
@Nonnull T[] partitionedElements) {
this.elementWriterFunction = elementWriterFunction;
this.firstKeyGroup = firstKeyGroup;
this.keyGroupOffsets = keyGroupEndOffsets;
this.partitionedElements = partitionedElements;
}
@Nonnegative
private int getKeyGroupStartOffsetInclusive(int keyGroup) {
int idx = keyGroup - firstKeyGroup - 1;
return idx < 0 ? 0 : keyGroupOffsets[idx];
}
@Nonnegative
private int getKeyGroupEndOffsetExclusive(int keyGroup) {
return keyGroupOffsets[keyGroup - firstKeyGroup];
}
@Override
public void writeStateInKeyGroup(@Nonnull DataOutputView dov, int keyGroupId)
throws IOException {
int startOffset = getKeyGroupStartOffsetInclusive(keyGroupId);
int endOffset = getKeyGroupEndOffsetExclusive(keyGroupId);
// write number of mappings in key-group
dov.writeInt(endOffset - startOffset);
// write mappings
for (int i = startOffset; i < endOffset; ++i) {
elementWriterFunction.writeElement(partitionedElements[i], dov);
}
}
@Override
public Iterator iterator(int keyGroupId) {
int startOffset = getKeyGroupStartOffsetInclusive(keyGroupId);
int endOffset = getKeyGroupEndOffsetExclusive(keyGroupId);
return Arrays.stream(partitionedElements, startOffset, endOffset).iterator();
}
}
public static StateSnapshotKeyGroupReader createKeyGroupPartitionReader(
@Nonnull ElementReaderFunction readerFunction,
@Nonnull KeyGroupElementsConsumer elementConsumer) {
return new PartitioningResultKeyGroupReader<>(readerFunction, elementConsumer);
}
/**
* General algorithm to read key-grouped state that was written from a {@link
* PartitioningResultImpl}.
*
* @param type of the elements to read.
*/
private static class PartitioningResultKeyGroupReader
implements StateSnapshotKeyGroupReader {
@Nonnull private final ElementReaderFunction readerFunction;
@Nonnull private final KeyGroupElementsConsumer elementConsumer;
public PartitioningResultKeyGroupReader(
@Nonnull ElementReaderFunction readerFunction,
@Nonnull KeyGroupElementsConsumer elementConsumer) {
this.readerFunction = readerFunction;
this.elementConsumer = elementConsumer;
}
@Override
public void readMappingsInKeyGroup(@Nonnull DataInputView in, @Nonnegative int keyGroupId)
throws IOException {
int numElements = in.readInt();
for (int i = 0; i < numElements; i++) {
T element = readerFunction.readElement(in);
elementConsumer.consume(element, keyGroupId);
}
}
}
/**
* This functional interface defines how one element is written to a {@link DataOutputView}.
*
* @param type of the written elements.
*/
@FunctionalInterface
public interface ElementWriterFunction {
/**
* This method defines how to write a single element to the output.
*
* @param element the element to be written.
* @param dov the output view to write the element.
* @throws IOException on write-related problems.
*/
void writeElement(@Nonnull T element, @Nonnull DataOutputView dov) throws IOException;
}
/**
* This functional interface defines how one element is read from a {@link DataInputView}.
*
* @param type of the read elements.
*/
@FunctionalInterface
public interface ElementReaderFunction {
@Nonnull
T readElement(@Nonnull DataInputView div) throws IOException;
}
/**
* Functional interface to consume elements from a key group.
*
* @param type of the consumed elements.
*/
@FunctionalInterface
public interface KeyGroupElementsConsumer {
void consume(@Nonnull T element, @Nonnegative int keyGroupId) throws IOException;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy