org.apache.flink.kubernetes.KubernetesResourceManager Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.kubernetes;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.GlobalConfiguration;
import org.apache.flink.kubernetes.configuration.KubernetesConfigOptions;
import org.apache.flink.kubernetes.configuration.KubernetesResourceManagerConfiguration;
import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient;
import org.apache.flink.kubernetes.kubeclient.factory.KubernetesTaskManagerFactory;
import org.apache.flink.kubernetes.kubeclient.parameters.KubernetesTaskManagerParameters;
import org.apache.flink.kubernetes.kubeclient.resources.KubernetesPod;
import org.apache.flink.kubernetes.utils.KubernetesUtils;
import org.apache.flink.runtime.clusterframework.ApplicationStatus;
import org.apache.flink.runtime.clusterframework.BootstrapTools;
import org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters;
import org.apache.flink.runtime.clusterframework.TaskExecutorProcessUtils;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.entrypoint.ClusterInformation;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.metrics.groups.ResourceManagerMetricGroup;
import org.apache.flink.runtime.resourcemanager.ActiveResourceManager;
import org.apache.flink.runtime.resourcemanager.JobLeaderIdService;
import org.apache.flink.runtime.resourcemanager.ResourceManager;
import org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException;
import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.RpcService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
/**
* Kubernetes specific implementation of the {@link ResourceManager}.
*/
public class KubernetesResourceManager extends ActiveResourceManager
implements FlinkKubeClient.PodCallbackHandler {
private static final Logger LOG = LoggerFactory.getLogger(KubernetesResourceManager.class);
/** The taskmanager pod name pattern is {clusterId}-{taskmanager}-{attemptId}-{podIndex}. */
private static final String TASK_MANAGER_POD_FORMAT = "%s-taskmanager-%d-%d";
private final Map workerNodes = new HashMap<>();
private final double defaultCpus;
/** When ResourceManager failover, the max attempt should recover. */
private long currentMaxAttemptId = 0;
/** Current max pod index. When creating a new pod, it should increase one. */
private long currentMaxPodId = 0;
private final String clusterId;
private final FlinkKubeClient kubeClient;
private final ContaineredTaskManagerParameters taskManagerParameters;
/** The number of pods requested, but not yet granted. */
private int numPendingPodRequests = 0;
private final KubernetesResourceManagerConfiguration configuration;
public KubernetesResourceManager(
RpcService rpcService,
String resourceManagerEndpointId,
ResourceID resourceId,
Configuration flinkConfig,
HighAvailabilityServices highAvailabilityServices,
HeartbeatServices heartbeatServices,
SlotManager slotManager,
JobLeaderIdService jobLeaderIdService,
ClusterInformation clusterInformation,
FatalErrorHandler fatalErrorHandler,
ResourceManagerMetricGroup resourceManagerMetricGroup,
FlinkKubeClient kubeClient,
KubernetesResourceManagerConfiguration configuration) {
super(
flinkConfig,
System.getenv(),
rpcService,
resourceManagerEndpointId,
resourceId,
highAvailabilityServices,
heartbeatServices,
slotManager,
jobLeaderIdService,
clusterInformation,
fatalErrorHandler,
resourceManagerMetricGroup);
this.clusterId = flinkConfig.getString(KubernetesConfigOptions.CLUSTER_ID);
this.defaultCpus = taskExecutorProcessSpec.getCpuCores().getValue().doubleValue();
this.kubeClient = kubeClient;
this.taskManagerParameters =
ContaineredTaskManagerParameters.create(flinkConfig, taskExecutorProcessSpec, numSlotsPerTaskManager);
this.configuration = configuration;
}
@Override
protected Configuration loadClientConfiguration() {
return GlobalConfiguration.loadConfiguration();
}
@Override
protected void initialize() throws ResourceManagerException {
recoverWorkerNodesFromPreviousAttempts();
kubeClient.watchPodsAndDoCallback(KubernetesUtils.getTaskManagerLabels(clusterId, flinkConfig), this);
}
@Override
public CompletableFuture onStop() {
// shut down all components
Throwable exception = null;
try {
kubeClient.close();
} catch (Throwable t) {
exception = t;
}
return getStopTerminationFutureOrCompletedExceptionally(exception);
}
@Override
protected void internalDeregisterApplication(ApplicationStatus finalStatus, @Nullable String diagnostics) {
LOG.info(
"Stopping kubernetes cluster, clusterId: {}, diagnostics: {}",
clusterId,
diagnostics == null ? "" : diagnostics);
kubeClient.stopAndCleanupCluster(clusterId);
}
@Override
public Collection startNewWorker(ResourceProfile resourceProfile) {
LOG.info("Starting new worker with resource profile, {}", resourceProfile);
if (!resourceProfilesPerWorker.iterator().next().isMatching(resourceProfile)) {
return Collections.emptyList();
}
requestKubernetesPod();
return resourceProfilesPerWorker;
}
@Override
protected KubernetesWorkerNode workerStarted(ResourceID resourceID) {
return workerNodes.get(resourceID);
}
@Override
public boolean stopWorker(final KubernetesWorkerNode worker) {
LOG.info("Stopping Worker {}.", worker.getResourceID());
workerNodes.remove(worker.getResourceID());
try {
kubeClient.stopPod(worker.getResourceID().toString());
} catch (Exception e) {
kubeClient.handleException(e);
return false;
}
return true;
}
@Override
public void onAdded(List pods) {
runAsync(() -> {
for (KubernetesPod pod : pods) {
if (numPendingPodRequests > 0) {
numPendingPodRequests--;
final KubernetesWorkerNode worker = new KubernetesWorkerNode(new ResourceID(pod.getName()));
workerNodes.putIfAbsent(worker.getResourceID(), worker);
}
log.info("Received new TaskManager pod: {} - Remaining pending pod requests: {}",
pod.getName(), numPendingPodRequests);
}
});
}
@Override
public void onModified(List pods) {
runAsync(() -> pods.forEach(this::removePodIfTerminated));
}
@Override
public void onDeleted(List pods) {
runAsync(() -> pods.forEach(this::removePodIfTerminated));
}
@Override
public void onError(List pods) {
runAsync(() -> pods.forEach(this::removePodIfTerminated));
}
@VisibleForTesting
Map getWorkerNodes() {
return workerNodes;
}
private void recoverWorkerNodesFromPreviousAttempts() throws ResourceManagerException {
final List podList = kubeClient.getPodsWithLabels(KubernetesUtils.getTaskManagerLabels(clusterId, flinkConfig));
for (KubernetesPod pod : podList) {
final KubernetesWorkerNode worker = new KubernetesWorkerNode(new ResourceID(pod.getName()));
workerNodes.put(worker.getResourceID(), worker);
final long attempt = worker.getAttempt();
if (attempt > currentMaxAttemptId) {
currentMaxAttemptId = attempt;
}
}
log.info("Recovered {} pods from previous attempts, current attempt id is {}.",
workerNodes.size(),
++currentMaxAttemptId);
}
private void requestKubernetesPod() {
numPendingPodRequests++;
log.info("Requesting new TaskManager pod with <{},{}>. Number pending requests {}.",
defaultMemoryMB,
defaultCpus,
numPendingPodRequests);
final String podName = String.format(
TASK_MANAGER_POD_FORMAT,
clusterId,
currentMaxAttemptId,
++currentMaxPodId);
final String dynamicProperties =
BootstrapTools.getDynamicPropertiesAsString(flinkClientConfig, flinkConfig);
final KubernetesTaskManagerParameters kubernetesTaskManagerParameters = new KubernetesTaskManagerParameters(
flinkConfig,
podName,
defaultMemoryMB,
dynamicProperties,
taskManagerParameters);
final KubernetesPod taskManagerPod =
KubernetesTaskManagerFactory.createTaskManagerComponent(kubernetesTaskManagerParameters);
log.info("TaskManager {} will be started with {}.", podName, taskExecutorProcessSpec);
kubeClient.createTaskManagerPod(taskManagerPod)
.whenComplete(
(ignore, throwable) -> {
if (throwable != null) {
log.error("Could not start TaskManager in pod {}.", podName, throwable);
scheduleRunAsync(
this::decreasePendingAndRequestKubernetesPodIfRequired,
configuration.getPodCreationRetryInterval());
}
}
);
}
private void decreasePendingAndRequestKubernetesPodIfRequired() {
validateRunsInMainThread();
numPendingPodRequests--;
requestKubernetesPodIfRequired();
}
/**
* Request new pod if pending pods cannot satisfy pending slot requests.
*/
private void requestKubernetesPodIfRequired() {
final int requiredTaskManagerSlots = getNumberRequiredTaskManagerSlots();
final int pendingTaskManagerSlots = numPendingPodRequests * numSlotsPerTaskManager;
if (requiredTaskManagerSlots > pendingTaskManagerSlots) {
requestKubernetesPod();
}
}
private void removePodIfTerminated(KubernetesPod pod) {
if (pod.isTerminated()) {
kubeClient.stopPod(pod.getName());
final KubernetesWorkerNode kubernetesWorkerNode = workerNodes.remove(new ResourceID(pod.getName()));
if (kubernetesWorkerNode != null) {
requestKubernetesPodIfRequired();
}
}
}
@Override
protected double getCpuCores(Configuration configuration) {
return TaskExecutorProcessUtils.getCpuCoresWithFallbackConfigOption(configuration, KubernetesConfigOptions.TASK_MANAGER_CPU);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy