org.apache.flink.runtime.state.KeyGroupRangeAssignment Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state;
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.util.MathUtils;
import org.apache.flink.util.Preconditions;
public final class KeyGroupRangeAssignment {
/**
* The default lower bound for max parallelism if nothing was configured by the user. We have
* this so allow users some degree of scale-up in case they forgot to configure maximum
* parallelism explicitly.
*/
public static final int DEFAULT_LOWER_BOUND_MAX_PARALLELISM = 1 << 7;
/** The (inclusive) upper bound for max parallelism. */
public static final int UPPER_BOUND_MAX_PARALLELISM =
Transformation.UPPER_BOUND_MAX_PARALLELISM;
private KeyGroupRangeAssignment() {
throw new AssertionError();
}
/**
* Assigns the given key to a parallel operator index.
*
* @param key the key to assign
* @param maxParallelism the maximum supported parallelism, aka the number of key-groups.
* @param parallelism the current parallelism of the operator
* @return the index of the parallel operator to which the given key should be routed.
*/
public static int assignKeyToParallelOperator(Object key, int maxParallelism, int parallelism) {
Preconditions.checkNotNull(key, "Assigned key must not be null!");
return computeOperatorIndexForKeyGroup(
maxParallelism, parallelism, assignToKeyGroup(key, maxParallelism));
}
/**
* Assigns the given key to a key-group index.
*
* @param key the key to assign
* @param maxParallelism the maximum supported parallelism, aka the number of key-groups.
* @return the key-group to which the given key is assigned
*/
public static int assignToKeyGroup(Object key, int maxParallelism) {
Preconditions.checkNotNull(key, "Assigned key must not be null!");
return computeKeyGroupForKeyHash(key.hashCode(), maxParallelism);
}
/**
* Assigns the given key to a key-group index.
*
* @param keyHash the hash of the key to assign
* @param maxParallelism the maximum supported parallelism, aka the number of key-groups.
* @return the key-group to which the given key is assigned
*/
public static int computeKeyGroupForKeyHash(int keyHash, int maxParallelism) {
return MathUtils.murmurHash(keyHash) % maxParallelism;
}
/**
* Computes the range of key-groups that are assigned to a given operator under the given
* parallelism and maximum parallelism.
*
* IMPORTANT: maxParallelism must be <= Short.MAX_VALUE to avoid rounding problems in this
* method. If we ever want to go beyond this boundary, this method must perform arithmetic on
* long values.
*
* @param maxParallelism Maximal parallelism that the job was initially created with.
* @param parallelism The current parallelism under which the job runs. Must be <=
* maxParallelism.
* @param operatorIndex Id of a key-group. 0 <= keyGroupID < maxParallelism.
* @return the computed key-group range for the operator.
*/
public static KeyGroupRange computeKeyGroupRangeForOperatorIndex(
int maxParallelism, int parallelism, int operatorIndex) {
checkParallelismPreconditions(parallelism);
checkParallelismPreconditions(maxParallelism);
Preconditions.checkArgument(
maxParallelism >= parallelism,
"Maximum parallelism must not be smaller than parallelism.");
int start = ((operatorIndex * maxParallelism + parallelism - 1) / parallelism);
int end = ((operatorIndex + 1) * maxParallelism - 1) / parallelism;
return new KeyGroupRange(start, end);
}
/**
* Computes the index of the operator to which a key-group belongs under the given parallelism
* and maximum parallelism.
*
*
IMPORTANT: maxParallelism must be <= Short.MAX_VALUE to avoid rounding problems in this
* method. If we ever want to go beyond this boundary, this method must perform arithmetic on
* long values.
*
* @param maxParallelism Maximal parallelism that the job was initially created with. 0 <
* parallelism <= maxParallelism <= Short.MAX_VALUE must hold.
* @param parallelism The current parallelism under which the job runs. Must be <=
* maxParallelism.
* @param keyGroupId Id of a key-group. 0 <= keyGroupID < maxParallelism.
* @return The index of the operator to which elements from the given key-group should be routed
* under the given parallelism and maxParallelism.
*/
public static int computeOperatorIndexForKeyGroup(
int maxParallelism, int parallelism, int keyGroupId) {
return keyGroupId * parallelism / maxParallelism;
}
/**
* Computes a default maximum parallelism from the operator parallelism. This is used in case
* the user has not explicitly configured a maximum parallelism to still allow a certain degree
* of scale-up.
*
* @param operatorParallelism the operator parallelism as basis for computation.
* @return the computed default maximum parallelism.
*/
public static int computeDefaultMaxParallelism(int operatorParallelism) {
checkParallelismPreconditions(operatorParallelism);
return Math.min(
Math.max(
MathUtils.roundUpToPowerOfTwo(
operatorParallelism + (operatorParallelism / 2)),
DEFAULT_LOWER_BOUND_MAX_PARALLELISM),
UPPER_BOUND_MAX_PARALLELISM);
}
public static void checkParallelismPreconditions(int parallelism) {
Preconditions.checkArgument(
parallelism > 0 && parallelism <= UPPER_BOUND_MAX_PARALLELISM,
"Operator parallelism not within bounds: " + parallelism);
}
}