org.apache.flink.runtime.jobmaster.JobMaster Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmaster;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.CheckpointingOptions;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.core.io.InputSplitAssigner;
import org.apache.flink.queryablestate.KvStateID;
import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.StoppingException;
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot;
import org.apache.flink.runtime.blob.BlobServer;
import org.apache.flink.runtime.checkpoint.CheckpointCoordinator;
import org.apache.flink.runtime.checkpoint.CheckpointDeclineReason;
import org.apache.flink.runtime.checkpoint.CheckpointMetrics;
import org.apache.flink.runtime.checkpoint.CheckpointTriggerException;
import org.apache.flink.runtime.checkpoint.Checkpoints;
import org.apache.flink.runtime.checkpoint.CompletedCheckpoint;
import org.apache.flink.runtime.checkpoint.TaskStateSnapshot;
import org.apache.flink.runtime.client.JobExecutionException;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.concurrent.FutureUtils;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.execution.SuppressRestartsException;
import org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph;
import org.apache.flink.runtime.executiongraph.Execution;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.ExecutionGraph;
import org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder;
import org.apache.flink.runtime.executiongraph.ExecutionJobVertex;
import org.apache.flink.runtime.executiongraph.IntermediateResult;
import org.apache.flink.runtime.executiongraph.JobStatusListener;
import org.apache.flink.runtime.executiongraph.restart.RestartStrategy;
import org.apache.flink.runtime.executiongraph.restart.RestartStrategyResolving;
import org.apache.flink.runtime.heartbeat.HeartbeatListener;
import org.apache.flink.runtime.heartbeat.HeartbeatManager;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.heartbeat.HeartbeatTarget;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings;
import org.apache.flink.runtime.jobmanager.OnCompletionActions;
import org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException;
import org.apache.flink.runtime.jobmaster.exceptions.JobModificationException;
import org.apache.flink.runtime.jobmaster.factories.JobManagerJobMetricGroupFactory;
import org.apache.flink.runtime.jobmaster.message.ClassloadingProps;
import org.apache.flink.runtime.jobmaster.slotpool.SlotPool;
import org.apache.flink.runtime.jobmaster.slotpool.SlotPoolFactory;
import org.apache.flink.runtime.jobmaster.slotpool.SlotPoolGateway;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.messages.FlinkJobNotFoundException;
import org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint;
import org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint;
import org.apache.flink.runtime.messages.webmonitor.JobDetails;
import org.apache.flink.runtime.metrics.groups.JobManagerJobMetricGroup;
import org.apache.flink.runtime.query.KvStateLocation;
import org.apache.flink.runtime.query.KvStateLocationRegistry;
import org.apache.flink.runtime.query.UnknownKvStateLocation;
import org.apache.flink.runtime.registration.RegisteredRpcConnection;
import org.apache.flink.runtime.registration.RegistrationResponse;
import org.apache.flink.runtime.registration.RetryingRegistration;
import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
import org.apache.flink.runtime.resourcemanager.ResourceManagerId;
import org.apache.flink.runtime.rest.handler.legacy.backpressure.BackPressureStatsTracker;
import org.apache.flink.runtime.rest.handler.legacy.backpressure.OperatorBackPressureStats;
import org.apache.flink.runtime.rest.handler.legacy.backpressure.OperatorBackPressureStatsResponse;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.FencedRpcEndpoint;
import org.apache.flink.runtime.rpc.RpcService;
import org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils;
import org.apache.flink.runtime.state.KeyGroupRange;
import org.apache.flink.runtime.taskexecutor.AccumulatorReport;
import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
import org.apache.flink.runtime.taskexecutor.slot.SlotOffer;
import org.apache.flink.runtime.taskmanager.TaskExecutionState;
import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
import org.apache.flink.runtime.webmonitor.WebMonitorUtils;
import org.apache.flink.types.SerializableOptional;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;
import org.apache.flink.util.InstantiationUtil;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import javax.annotation.Nullable;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeoutException;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/**
* JobMaster implementation. The job master is responsible for the execution of a single
* {@link JobGraph}.
*
* It offers the following methods as part of its rpc interface to interact with the JobMaster
* remotely:
*
* - {@link #updateTaskExecutionState} updates the task execution state for
* given task
*
*/
public class JobMaster extends FencedRpcEndpoint implements JobMasterGateway {
/** Default names for Flink's distributed components. */
public static final String JOB_MANAGER_NAME = "jobmanager";
public static final String ARCHIVE_NAME = "archive";
// ------------------------------------------------------------------------
private final JobMasterConfiguration jobMasterConfiguration;
private final ResourceID resourceId;
private final JobGraph jobGraph;
private final Time rpcTimeout;
private final HighAvailabilityServices highAvailabilityServices;
private final BlobServer blobServer;
private final JobManagerJobMetricGroupFactory jobMetricGroupFactory;
private final HeartbeatManager taskManagerHeartbeatManager;
private final HeartbeatManager resourceManagerHeartbeatManager;
private final ScheduledExecutorService scheduledExecutorService;
private final OnCompletionActions jobCompletionActions;
private final FatalErrorHandler fatalErrorHandler;
private final ClassLoader userCodeLoader;
private final SlotPool slotPool;
private final SlotPoolGateway slotPoolGateway;
private final RestartStrategy restartStrategy;
// --------- BackPressure --------
private final BackPressureStatsTracker backPressureStatsTracker;
// --------- ResourceManager --------
private final LeaderRetrievalService resourceManagerLeaderRetriever;
// --------- TaskManagers --------
private final Map> registeredTaskManagers;
// -------- Mutable fields ---------
private ExecutionGraph executionGraph;
@Nullable
private JobManagerJobStatusListener jobStatusListener;
@Nullable
private JobManagerJobMetricGroup jobManagerJobMetricGroup;
@Nullable
private String lastInternalSavepoint;
@Nullable
private ResourceManagerAddress resourceManagerAddress;
@Nullable
private ResourceManagerConnection resourceManagerConnection;
@Nullable
private EstablishedResourceManagerConnection establishedResourceManagerConnection;
// ------------------------------------------------------------------------
public JobMaster(
RpcService rpcService,
JobMasterConfiguration jobMasterConfiguration,
ResourceID resourceId,
JobGraph jobGraph,
HighAvailabilityServices highAvailabilityService,
SlotPoolFactory slotPoolFactory,
JobManagerSharedServices jobManagerSharedServices,
HeartbeatServices heartbeatServices,
BlobServer blobServer,
JobManagerJobMetricGroupFactory jobMetricGroupFactory,
OnCompletionActions jobCompletionActions,
FatalErrorHandler fatalErrorHandler,
ClassLoader userCodeLoader) throws Exception {
super(rpcService, AkkaRpcServiceUtils.createRandomName(JOB_MANAGER_NAME));
final JobMasterGateway selfGateway = getSelfGateway(JobMasterGateway.class);
this.jobMasterConfiguration = checkNotNull(jobMasterConfiguration);
this.resourceId = checkNotNull(resourceId);
this.jobGraph = checkNotNull(jobGraph);
this.rpcTimeout = jobMasterConfiguration.getRpcTimeout();
this.highAvailabilityServices = checkNotNull(highAvailabilityService);
this.blobServer = checkNotNull(blobServer);
this.scheduledExecutorService = jobManagerSharedServices.getScheduledExecutorService();
this.jobCompletionActions = checkNotNull(jobCompletionActions);
this.fatalErrorHandler = checkNotNull(fatalErrorHandler);
this.userCodeLoader = checkNotNull(userCodeLoader);
this.jobMetricGroupFactory = checkNotNull(jobMetricGroupFactory);
this.taskManagerHeartbeatManager = heartbeatServices.createHeartbeatManagerSender(
resourceId,
new TaskManagerHeartbeatListener(selfGateway),
rpcService.getScheduledExecutor(),
log);
this.resourceManagerHeartbeatManager = heartbeatServices.createHeartbeatManager(
resourceId,
new ResourceManagerHeartbeatListener(),
rpcService.getScheduledExecutor(),
log);
final String jobName = jobGraph.getName();
final JobID jid = jobGraph.getJobID();
log.info("Initializing job {} ({}).", jobName, jid);
final RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration =
jobGraph.getSerializedExecutionConfig()
.deserializeValue(userCodeLoader)
.getRestartStrategy();
this.restartStrategy = RestartStrategyResolving.resolve(restartStrategyConfiguration,
jobManagerSharedServices.getRestartStrategyFactory(),
jobGraph.isCheckpointingEnabled());
log.info("Using restart strategy {} for {} ({}).", this.restartStrategy, jobName, jid);
resourceManagerLeaderRetriever = highAvailabilityServices.getResourceManagerLeaderRetriever();
this.slotPool = checkNotNull(slotPoolFactory).createSlotPool(jobGraph.getJobID());
this.slotPoolGateway = slotPool.getSelfGateway(SlotPoolGateway.class);
this.registeredTaskManagers = new HashMap<>(4);
this.backPressureStatsTracker = checkNotNull(jobManagerSharedServices.getBackPressureStatsTracker());
this.lastInternalSavepoint = null;
this.jobManagerJobMetricGroup = jobMetricGroupFactory.create(jobGraph);
this.executionGraph = createAndRestoreExecutionGraph(jobManagerJobMetricGroup);
this.jobStatusListener = null;
this.resourceManagerConnection = null;
this.establishedResourceManagerConnection = null;
}
//----------------------------------------------------------------------------------------------
// Lifecycle management
//----------------------------------------------------------------------------------------------
@Override
public void start() {
throw new UnsupportedOperationException("Should never call start() without leader ID");
}
/**
* Start the rpc service and begin to run the job.
*
* @param newJobMasterId The necessary fencing token to run the job
* @param timeout for the operation
* @return Future acknowledge if the job could be started. Otherwise the future contains an exception
*/
public CompletableFuture start(final JobMasterId newJobMasterId, final Time timeout) throws Exception {
// make sure we receive RPC and async calls
super.start();
return callAsyncWithoutFencing(() -> startJobExecution(newJobMasterId), timeout);
}
/**
* Suspending job, all the running tasks will be cancelled, and communication with other components
* will be disposed.
*
* Mostly job is suspended because of the leadership has been revoked, one can be restart this job by
* calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again.
*
*
This method is executed asynchronously
*
* @param cause The reason of why this job been suspended.
* @param timeout for this operation
* @return Future acknowledge indicating that the job has been suspended. Otherwise the future contains an exception
*/
public CompletableFuture suspend(final Exception cause, final Time timeout) {
CompletableFuture suspendFuture = callAsyncWithoutFencing(() -> suspendExecution(cause), timeout);
stop();
return suspendFuture;
}
/**
* Suspend the job and shutdown all other services including rpc.
*/
@Override
public CompletableFuture postStop() {
log.info("Stopping the JobMaster for job {}({}).", jobGraph.getName(), jobGraph.getJobID());
// disconnect from all registered TaskExecutors
final Set taskManagerResourceIds = new HashSet<>(registeredTaskManagers.keySet());
final FlinkException cause = new FlinkException("Stopping JobMaster for job " + jobGraph.getName() +
'(' + jobGraph.getJobID() + ").");
for (ResourceID taskManagerResourceId : taskManagerResourceIds) {
disconnectTaskManager(taskManagerResourceId, cause);
}
taskManagerHeartbeatManager.stop();
resourceManagerHeartbeatManager.stop();
// make sure there is a graceful exit
suspendExecution(new FlinkException("JobManager is shutting down."));
// shut down will internally release all registered slots
slotPool.shutDown();
final CompletableFuture disposeInternalSavepointFuture;
if (lastInternalSavepoint != null) {
disposeInternalSavepointFuture = CompletableFuture.runAsync(() -> disposeSavepoint(lastInternalSavepoint));
} else {
disposeInternalSavepointFuture = CompletableFuture.completedFuture(null);
}
final CompletableFuture slotPoolTerminationFuture = slotPool.getTerminationFuture();
return FutureUtils.completeAll(Arrays.asList(disposeInternalSavepointFuture, slotPoolTerminationFuture));
}
//----------------------------------------------------------------------------------------------
// RPC methods
//----------------------------------------------------------------------------------------------
@Override
public CompletableFuture cancel(Time timeout) {
executionGraph.cancel();
return CompletableFuture.completedFuture(Acknowledge.get());
}
@Override
public CompletableFuture stop(Time timeout) {
try {
executionGraph.stop();
} catch (StoppingException e) {
return FutureUtils.completedExceptionally(e);
}
return CompletableFuture.completedFuture(Acknowledge.get());
}
@Override
public CompletableFuture rescaleJob(
int newParallelism,
RescalingBehaviour rescalingBehaviour,
Time timeout) {
final ArrayList allOperators = new ArrayList<>(jobGraph.getNumberOfVertices());
for (JobVertex jobVertex : jobGraph.getVertices()) {
allOperators.add(jobVertex.getID());
}
return rescaleOperators(allOperators, newParallelism, rescalingBehaviour, timeout);
}
@Override
public CompletableFuture rescaleOperators(
Collection operators,
int newParallelism,
RescalingBehaviour rescalingBehaviour,
Time timeout) {
if (newParallelism <= 0) {
return FutureUtils.completedExceptionally(
new JobModificationException("The target parallelism of a rescaling operation must be larger than 0."));
}
// 1. Check whether we can rescale the job & rescale the respective vertices
try {
rescaleJobGraph(operators, newParallelism, rescalingBehaviour);
} catch (FlinkException e) {
final String msg = String.format("Cannot rescale job %s.", jobGraph.getName());
log.info(msg, e);
return FutureUtils.completedExceptionally(new JobModificationException(msg, e));
}
final ExecutionGraph currentExecutionGraph = executionGraph;
final JobManagerJobMetricGroup newJobManagerJobMetricGroup = jobMetricGroupFactory.create(jobGraph);
final ExecutionGraph newExecutionGraph;
try {
newExecutionGraph = createExecutionGraph(newJobManagerJobMetricGroup);
} catch (JobExecutionException | JobException e) {
return FutureUtils.completedExceptionally(
new JobModificationException("Could not create rescaled ExecutionGraph.", e));
}
// 3. disable checkpoint coordinator to suppress subsequent checkpoints
final CheckpointCoordinator checkpointCoordinator = currentExecutionGraph.getCheckpointCoordinator();
checkpointCoordinator.stopCheckpointScheduler();
// 4. take a savepoint
final CompletableFuture savepointFuture = getJobModificationSavepoint(timeout);
final CompletableFuture executionGraphFuture = restoreExecutionGraphFromRescalingSavepoint(
newExecutionGraph,
savepointFuture)
.handleAsync(
(ExecutionGraph executionGraph, Throwable failure) -> {
if (failure != null) {
// in case that we couldn't take a savepoint or restore from it, let's restart the checkpoint
// coordinator and abort the rescaling operation
if (checkpointCoordinator.isPeriodicCheckpointingConfigured()) {
checkpointCoordinator.startCheckpointScheduler();
}
throw new CompletionException(ExceptionUtils.stripCompletionException(failure));
} else {
return executionGraph;
}
},
getMainThreadExecutor());
// 5. suspend the current job
final CompletableFuture terminationFuture = executionGraphFuture.thenComposeAsync(
(ExecutionGraph ignored) -> {
suspendExecutionGraph(new FlinkException("Job is being rescaled."));
return currentExecutionGraph.getTerminationFuture();
},
getMainThreadExecutor());
final CompletableFuture suspendedFuture = terminationFuture.thenAccept(
(JobStatus jobStatus) -> {
if (jobStatus != JobStatus.SUSPENDED) {
final String msg = String.format("Job %s rescaling failed because we could not suspend the execution graph.", jobGraph.getName());
log.info(msg);
throw new CompletionException(new JobModificationException(msg));
}
});
// 6. resume the new execution graph from the taken savepoint
final CompletableFuture rescalingFuture = suspendedFuture.thenCombineAsync(
executionGraphFuture,
(Void ignored, ExecutionGraph restoredExecutionGraph) -> {
// check if the ExecutionGraph is still the same
if (executionGraph == currentExecutionGraph) {
clearExecutionGraphFields();
assignExecutionGraph(restoredExecutionGraph, newJobManagerJobMetricGroup);
scheduleExecutionGraph();
return Acknowledge.get();
} else {
throw new CompletionException(new JobModificationException("Detected concurrent modification of ExecutionGraph. Aborting the rescaling."));
}
},
getMainThreadExecutor());
rescalingFuture.whenComplete(
(Acknowledge ignored, Throwable throwable) -> {
if (throwable != null) {
// fail the newly created execution graph
newExecutionGraph.failGlobal(
new SuppressRestartsException(
new FlinkException(
String.format("Failed to rescale the job %s.", jobGraph.getJobID()),
throwable)));
}
});
return rescalingFuture;
}
/**
* Updates the task execution state for a given task.
*
* @param taskExecutionState New task execution state for a given task
* @return Acknowledge the task execution state update
*/
@Override
public CompletableFuture updateTaskExecutionState(
final TaskExecutionState taskExecutionState) {
checkNotNull(taskExecutionState, "taskExecutionState");
if (executionGraph.updateState(taskExecutionState)) {
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
return FutureUtils.completedExceptionally(
new ExecutionGraphException("The execution attempt " +
taskExecutionState.getID() + " was not found."));
}
}
@Override
public CompletableFuture requestNextInputSplit(
final JobVertexID vertexID,
final ExecutionAttemptID executionAttempt) {
final Execution execution = executionGraph.getRegisteredExecutions().get(executionAttempt);
if (execution == null) {
// can happen when JobManager had already unregistered this execution upon on task failure,
// but TaskManager get some delay to aware of that situation
if (log.isDebugEnabled()) {
log.debug("Can not find Execution for attempt {}.", executionAttempt);
}
// but we should TaskManager be aware of this
return FutureUtils.completedExceptionally(new Exception("Can not find Execution for attempt " + executionAttempt));
}
final ExecutionJobVertex vertex = executionGraph.getJobVertex(vertexID);
if (vertex == null) {
log.error("Cannot find execution vertex for vertex ID {}.", vertexID);
return FutureUtils.completedExceptionally(new Exception("Cannot find execution vertex for vertex ID " + vertexID));
}
final InputSplitAssigner splitAssigner = vertex.getSplitAssigner();
if (splitAssigner == null) {
log.error("No InputSplitAssigner for vertex ID {}.", vertexID);
return FutureUtils.completedExceptionally(new Exception("No InputSplitAssigner for vertex ID " + vertexID));
}
final LogicalSlot slot = execution.getAssignedResource();
final int taskId = execution.getVertex().getParallelSubtaskIndex();
final String host = slot != null ? slot.getTaskManagerLocation().getHostname() : null;
final InputSplit nextInputSplit = splitAssigner.getNextInputSplit(host, taskId);
if (log.isDebugEnabled()) {
log.debug("Send next input split {}.", nextInputSplit);
}
try {
final byte[] serializedInputSplit = InstantiationUtil.serializeObject(nextInputSplit);
return CompletableFuture.completedFuture(new SerializedInputSplit(serializedInputSplit));
} catch (Exception ex) {
log.error("Could not serialize the next input split of class {}.", nextInputSplit.getClass(), ex);
IOException reason = new IOException("Could not serialize the next input split of class " +
nextInputSplit.getClass() + ".", ex);
vertex.fail(reason);
return FutureUtils.completedExceptionally(reason);
}
}
@Override
public CompletableFuture requestPartitionState(
final IntermediateDataSetID intermediateResultId,
final ResultPartitionID resultPartitionId) {
final Execution execution = executionGraph.getRegisteredExecutions().get(resultPartitionId.getProducerId());
if (execution != null) {
return CompletableFuture.completedFuture(execution.getState());
}
else {
final IntermediateResult intermediateResult =
executionGraph.getAllIntermediateResults().get(intermediateResultId);
if (intermediateResult != null) {
// Try to find the producing execution
Execution producerExecution = intermediateResult
.getPartitionById(resultPartitionId.getPartitionId())
.getProducer()
.getCurrentExecutionAttempt();
if (producerExecution.getAttemptId().equals(resultPartitionId.getProducerId())) {
return CompletableFuture.completedFuture(producerExecution.getState());
} else {
return FutureUtils.completedExceptionally(new PartitionProducerDisposedException(resultPartitionId));
}
} else {
return FutureUtils.completedExceptionally(new IllegalArgumentException("Intermediate data set with ID "
+ intermediateResultId + " not found."));
}
}
}
@Override
public CompletableFuture scheduleOrUpdateConsumers(
final ResultPartitionID partitionID,
final Time timeout) {
try {
executionGraph.scheduleOrUpdateConsumers(partitionID);
return CompletableFuture.completedFuture(Acknowledge.get());
} catch (Exception e) {
return FutureUtils.completedExceptionally(e);
}
}
@Override
public CompletableFuture disconnectTaskManager(final ResourceID resourceID, final Exception cause) {
log.debug("Disconnect TaskExecutor {} because: {}", resourceID, cause.getMessage());
taskManagerHeartbeatManager.unmonitorTarget(resourceID);
CompletableFuture releaseFuture = slotPoolGateway.releaseTaskManager(resourceID, cause);
Tuple2 taskManagerConnection = registeredTaskManagers.remove(resourceID);
if (taskManagerConnection != null) {
taskManagerConnection.f1.disconnectJobManager(jobGraph.getJobID(), cause);
}
return releaseFuture;
}
// TODO: This method needs a leader session ID
@Override
public void acknowledgeCheckpoint(
final JobID jobID,
final ExecutionAttemptID executionAttemptID,
final long checkpointId,
final CheckpointMetrics checkpointMetrics,
final TaskStateSnapshot checkpointState) {
final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
final AcknowledgeCheckpoint ackMessage = new AcknowledgeCheckpoint(
jobID,
executionAttemptID,
checkpointId,
checkpointMetrics,
checkpointState);
if (checkpointCoordinator != null) {
getRpcService().execute(() -> {
try {
checkpointCoordinator.receiveAcknowledgeMessage(ackMessage);
} catch (Throwable t) {
log.warn("Error while processing checkpoint acknowledgement message", t);
}
});
} else {
String errorMessage = "Received AcknowledgeCheckpoint message for job {} with no CheckpointCoordinator";
if (executionGraph.getState() == JobStatus.RUNNING) {
log.error(errorMessage, jobGraph.getJobID());
} else {
log.debug(errorMessage, jobGraph.getJobID());
}
}
}
// TODO: This method needs a leader session ID
@Override
public void declineCheckpoint(DeclineCheckpoint decline) {
final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
if (checkpointCoordinator != null) {
getRpcService().execute(() -> {
try {
checkpointCoordinator.receiveDeclineMessage(decline);
} catch (Exception e) {
log.error("Error in CheckpointCoordinator while processing {}", decline, e);
}
});
} else {
String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator";
if (executionGraph.getState() == JobStatus.RUNNING) {
log.error(errorMessage, jobGraph.getJobID());
} else {
log.debug(errorMessage, jobGraph.getJobID());
}
}
}
@Override
public CompletableFuture requestKvStateLocation(final JobID jobId, final String registrationName) {
// sanity check for the correct JobID
if (jobGraph.getJobID().equals(jobId)) {
if (log.isDebugEnabled()) {
log.debug("Lookup key-value state for job {} with registration " +
"name {}.", jobGraph.getJobID(), registrationName);
}
final KvStateLocationRegistry registry = executionGraph.getKvStateLocationRegistry();
final KvStateLocation location = registry.getKvStateLocation(registrationName);
if (location != null) {
return CompletableFuture.completedFuture(location);
} else {
return FutureUtils.completedExceptionally(new UnknownKvStateLocation(registrationName));
}
} else {
if (log.isDebugEnabled()) {
log.debug("Request of key-value state location for unknown job {} received.", jobId);
}
return FutureUtils.completedExceptionally(new FlinkJobNotFoundException(jobId));
}
}
@Override
public CompletableFuture notifyKvStateRegistered(
final JobID jobId,
final JobVertexID jobVertexId,
final KeyGroupRange keyGroupRange,
final String registrationName,
final KvStateID kvStateId,
final InetSocketAddress kvStateServerAddress) {
if (jobGraph.getJobID().equals(jobId)) {
if (log.isDebugEnabled()) {
log.debug("Key value state registered for job {} under name {}.",
jobGraph.getJobID(), registrationName);
}
try {
executionGraph.getKvStateLocationRegistry().notifyKvStateRegistered(
jobVertexId, keyGroupRange, registrationName, kvStateId, kvStateServerAddress);
return CompletableFuture.completedFuture(Acknowledge.get());
} catch (Exception e) {
log.error("Failed to notify KvStateRegistry about registration {}.", registrationName);
return FutureUtils.completedExceptionally(e);
}
} else {
if (log.isDebugEnabled()) {
log.debug("Notification about key-value state registration for unknown job {} received.", jobId);
}
return FutureUtils.completedExceptionally(new FlinkJobNotFoundException(jobId));
}
}
@Override
public CompletableFuture notifyKvStateUnregistered(
JobID jobId,
JobVertexID jobVertexId,
KeyGroupRange keyGroupRange,
String registrationName) {
if (jobGraph.getJobID().equals(jobId)) {
if (log.isDebugEnabled()) {
log.debug("Key value state unregistered for job {} under name {}.",
jobGraph.getJobID(), registrationName);
}
try {
executionGraph.getKvStateLocationRegistry().notifyKvStateUnregistered(
jobVertexId, keyGroupRange, registrationName);
return CompletableFuture.completedFuture(Acknowledge.get());
} catch (Exception e) {
log.error("Failed to notify KvStateRegistry about registration {}.", registrationName, e);
return FutureUtils.completedExceptionally(e);
}
} else {
if (log.isDebugEnabled()) {
log.debug("Notification about key-value state deregistration for unknown job {} received.", jobId);
}
return FutureUtils.completedExceptionally(new FlinkJobNotFoundException(jobId));
}
}
@Override
public CompletableFuture requestClassloadingProps() {
return CompletableFuture.completedFuture(
new ClassloadingProps(blobServer.getPort(),
executionGraph.getRequiredJarFiles(),
executionGraph.getRequiredClasspaths()));
}
@Override
public CompletableFuture> offerSlots(
final ResourceID taskManagerId,
final Collection slots,
final Time timeout) {
Tuple2 taskManager = registeredTaskManagers.get(taskManagerId);
if (taskManager == null) {
return FutureUtils.completedExceptionally(new Exception("Unknown TaskManager " + taskManagerId));
}
final TaskManagerLocation taskManagerLocation = taskManager.f0;
final TaskExecutorGateway taskExecutorGateway = taskManager.f1;
final RpcTaskManagerGateway rpcTaskManagerGateway = new RpcTaskManagerGateway(taskExecutorGateway, getFencingToken());
return slotPoolGateway.offerSlots(
taskManagerLocation,
rpcTaskManagerGateway,
slots);
}
@Override
public void failSlot(
final ResourceID taskManagerId,
final AllocationID allocationId,
final Exception cause) {
if (registeredTaskManagers.containsKey(taskManagerId)) {
internalFailAllocation(allocationId, cause);
} else {
log.warn("Cannot fail slot " + allocationId + " because the TaskManager " +
taskManagerId + " is unknown.");
}
}
private void internalFailAllocation(AllocationID allocationId, Exception cause) {
final CompletableFuture> emptyTaskExecutorFuture = slotPoolGateway.failAllocation(allocationId, cause);
emptyTaskExecutorFuture.thenAcceptAsync(
resourceIdOptional -> resourceIdOptional.ifPresent(this::releaseEmptyTaskManager),
getMainThreadExecutor());
}
private CompletableFuture releaseEmptyTaskManager(ResourceID resourceId) {
return disconnectTaskManager(resourceId, new FlinkException(String.format("No more slots registered at JobMaster %s.", resourceId)));
}
@Override
public CompletableFuture registerTaskManager(
final String taskManagerRpcAddress,
final TaskManagerLocation taskManagerLocation,
final Time timeout) {
final ResourceID taskManagerId = taskManagerLocation.getResourceID();
if (registeredTaskManagers.containsKey(taskManagerId)) {
final RegistrationResponse response = new JMTMRegistrationSuccess(resourceId);
return CompletableFuture.completedFuture(response);
} else {
return getRpcService()
.connect(taskManagerRpcAddress, TaskExecutorGateway.class)
.handleAsync(
(TaskExecutorGateway taskExecutorGateway, Throwable throwable) -> {
if (throwable != null) {
return new RegistrationResponse.Decline(throwable.getMessage());
}
slotPoolGateway.registerTaskManager(taskManagerId);
registeredTaskManagers.put(taskManagerId, Tuple2.of(taskManagerLocation, taskExecutorGateway));
// monitor the task manager as heartbeat target
taskManagerHeartbeatManager.monitorTarget(taskManagerId, new HeartbeatTarget() {
@Override
public void receiveHeartbeat(ResourceID resourceID, Void payload) {
// the task manager will not request heartbeat, so this method will never be called currently
}
@Override
public void requestHeartbeat(ResourceID resourceID, Void payload) {
taskExecutorGateway.heartbeatFromJobManager(resourceID);
}
});
return new JMTMRegistrationSuccess(resourceId);
},
getMainThreadExecutor());
}
}
@Override
public void disconnectResourceManager(
final ResourceManagerId resourceManagerId,
final Exception cause) {
if (isConnectingToResourceManager(resourceManagerId)) {
reconnectToResourceManager(cause);
}
}
private boolean isConnectingToResourceManager(ResourceManagerId resourceManagerId) {
return resourceManagerAddress != null
&& resourceManagerAddress.getResourceManagerId().equals(resourceManagerId);
}
@Override
public void heartbeatFromTaskManager(final ResourceID resourceID, AccumulatorReport accumulatorReport) {
taskManagerHeartbeatManager.receiveHeartbeat(resourceID, accumulatorReport);
}
@Override
public void heartbeatFromResourceManager(final ResourceID resourceID) {
resourceManagerHeartbeatManager.requestHeartbeat(resourceID, null);
}
@Override
public CompletableFuture requestJobDetails(Time timeout) {
final ExecutionGraph currentExecutionGraph = executionGraph;
return CompletableFuture.supplyAsync(() -> WebMonitorUtils.createDetailsForJob(currentExecutionGraph), scheduledExecutorService);
}
@Override
public CompletableFuture requestJobStatus(Time timeout) {
return CompletableFuture.completedFuture(executionGraph.getState());
}
@Override
public CompletableFuture requestJob(Time timeout) {
return CompletableFuture.completedFuture(ArchivedExecutionGraph.createFrom(executionGraph));
}
@Override
public CompletableFuture triggerSavepoint(
@Nullable final String targetDirectory,
final boolean cancelJob,
final Time timeout) {
final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
if (checkpointCoordinator == null) {
return FutureUtils.completedExceptionally(new IllegalStateException(
String.format("Job %s is not a streaming job.", jobGraph.getJobID())));
} else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) {
log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID());
return FutureUtils.completedExceptionally(new IllegalStateException(
"No savepoint directory configured. You can either specify a directory " +
"while cancelling via -s :targetDirectory or configure a cluster-wide " +
"default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'."));
}
if (cancelJob) {
checkpointCoordinator.stopCheckpointScheduler();
}
return checkpointCoordinator
.triggerSavepoint(System.currentTimeMillis(), targetDirectory)
.thenApply(CompletedCheckpoint::getExternalPointer)
.handleAsync((path, throwable) -> {
if (throwable != null) {
if (cancelJob) {
startCheckpointScheduler(checkpointCoordinator);
}
throw new CompletionException(throwable);
} else if (cancelJob) {
log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID());
cancel(timeout);
}
return path;
}, getMainThreadExecutor());
}
private void startCheckpointScheduler(final CheckpointCoordinator checkpointCoordinator) {
if (checkpointCoordinator.isPeriodicCheckpointingConfigured()) {
try {
checkpointCoordinator.startCheckpointScheduler();
} catch (IllegalStateException ignored) {
// Concurrent shut down of the coordinator
}
}
}
@Override
public CompletableFuture requestOperatorBackPressureStats(final JobVertexID jobVertexId) {
final ExecutionJobVertex jobVertex = executionGraph.getJobVertex(jobVertexId);
if (jobVertex == null) {
return FutureUtils.completedExceptionally(new FlinkException("JobVertexID not found " +
jobVertexId));
}
final Optional operatorBackPressureStats =
backPressureStatsTracker.getOperatorBackPressureStats(jobVertex);
return CompletableFuture.completedFuture(OperatorBackPressureStatsResponse.of(
operatorBackPressureStats.orElse(null)));
}
@Override
public void notifyAllocationFailure(AllocationID allocationID, Exception cause) {
internalFailAllocation(allocationID, cause);
}
//----------------------------------------------------------------------------------------------
// Internal methods
//----------------------------------------------------------------------------------------------
//-- job starting and stopping -----------------------------------------------------------------
private Acknowledge startJobExecution(JobMasterId newJobMasterId) throws Exception {
validateRunsInMainThread();
checkNotNull(newJobMasterId, "The new JobMasterId must not be null.");
if (Objects.equals(getFencingToken(), newJobMasterId)) {
log.info("Already started the job execution with JobMasterId {}.", newJobMasterId);
return Acknowledge.get();
}
setNewFencingToken(newJobMasterId);
startJobMasterServices();
log.info("Starting execution of job {} ({})", jobGraph.getName(), jobGraph.getJobID());
resetAndScheduleExecutionGraph();
return Acknowledge.get();
}
private void startJobMasterServices() throws Exception {
// start the slot pool make sure the slot pool now accepts messages for this leader
slotPool.start(getFencingToken(), getAddress());
//TODO: Remove once the ZooKeeperLeaderRetrieval returns the stored address upon start
// try to reconnect to previously known leader
reconnectToResourceManager(new FlinkException("Starting JobMaster component."));
// job is ready to go, try to establish connection with resource manager
// - activate leader retrieval for the resource manager
// - on notification of the leader, the connection will be established and
// the slot pool will start requesting slots
resourceManagerLeaderRetriever.start(new ResourceManagerLeaderListener());
}
private void setNewFencingToken(JobMasterId newJobMasterId) {
if (getFencingToken() != null) {
log.info("Restarting old job with JobMasterId {}. The new JobMasterId is {}.", getFencingToken(), newJobMasterId);
// first we have to suspend the current execution
suspendExecution(new FlinkException("Old job with JobMasterId " + getFencingToken() +
" is restarted with a new JobMasterId " + newJobMasterId + '.'));
}
// set new leader id
setFencingToken(newJobMasterId);
}
/**
* Suspending job, all the running tasks will be cancelled, and communication with other components
* will be disposed.
*
* Mostly job is suspended because of the leadership has been revoked, one can be restart this job by
* calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again.
*
* @param cause The reason of why this job been suspended.
*/
private Acknowledge suspendExecution(final Exception cause) {
validateRunsInMainThread();
if (getFencingToken() == null) {
log.debug("Job has already been suspended or shutdown.");
return Acknowledge.get();
}
// not leader anymore --> set the JobMasterId to null
setFencingToken(null);
try {
resourceManagerLeaderRetriever.stop();
} catch (Throwable t) {
log.warn("Failed to stop resource manager leader retriever when suspending.", t);
}
suspendAndClearExecutionGraphFields(cause);
// the slot pool stops receiving messages and clears its pooled slots
slotPoolGateway.suspend();
// disconnect from resource manager:
closeResourceManagerConnection(cause);
return Acknowledge.get();
}
private void assignExecutionGraph(
ExecutionGraph newExecutionGraph,
JobManagerJobMetricGroup newJobManagerJobMetricGroup) {
validateRunsInMainThread();
checkState(executionGraph.getState().isTerminalState());
checkState(jobManagerJobMetricGroup == null);
executionGraph = newExecutionGraph;
jobManagerJobMetricGroup = newJobManagerJobMetricGroup;
}
private void resetAndScheduleExecutionGraph() throws Exception {
validateRunsInMainThread();
final CompletableFuture executionGraphAssignedFuture;
if (executionGraph.getState() == JobStatus.CREATED) {
executionGraphAssignedFuture = CompletableFuture.completedFuture(null);
} else {
suspendAndClearExecutionGraphFields(new FlinkException("ExecutionGraph is being reset in order to be rescheduled."));
final JobManagerJobMetricGroup newJobManagerJobMetricGroup = jobMetricGroupFactory.create(jobGraph);
final ExecutionGraph newExecutionGraph = createAndRestoreExecutionGraph(newJobManagerJobMetricGroup);
executionGraphAssignedFuture = executionGraph.getTerminationFuture().handleAsync(
(JobStatus ignored, Throwable throwable) -> {
assignExecutionGraph(newExecutionGraph, newJobManagerJobMetricGroup);
return null;
},
getMainThreadExecutor());
}
executionGraphAssignedFuture.thenRun(this::scheduleExecutionGraph);
}
private void scheduleExecutionGraph() {
checkState(jobStatusListener == null);
// register self as job status change listener
jobStatusListener = new JobManagerJobStatusListener();
executionGraph.registerJobStatusListener(jobStatusListener);
try {
executionGraph.scheduleForExecution();
}
catch (Throwable t) {
executionGraph.failGlobal(t);
}
}
private ExecutionGraph createAndRestoreExecutionGraph(JobManagerJobMetricGroup currentJobManagerJobMetricGroup) throws Exception {
ExecutionGraph newExecutionGraph = createExecutionGraph(currentJobManagerJobMetricGroup);
final CheckpointCoordinator checkpointCoordinator = newExecutionGraph.getCheckpointCoordinator();
if (checkpointCoordinator != null) {
// check whether we find a valid checkpoint
if (!checkpointCoordinator.restoreLatestCheckpointedState(
newExecutionGraph.getAllVertices(),
false,
false)) {
// check whether we can restore from a savepoint
tryRestoreExecutionGraphFromSavepoint(newExecutionGraph, jobGraph.getSavepointRestoreSettings());
}
}
return newExecutionGraph;
}
private ExecutionGraph createExecutionGraph(JobManagerJobMetricGroup currentJobManagerJobMetricGroup) throws JobExecutionException, JobException {
return ExecutionGraphBuilder.buildGraph(
null,
jobGraph,
jobMasterConfiguration.getConfiguration(),
scheduledExecutorService,
scheduledExecutorService,
slotPool.getSlotProvider(),
userCodeLoader,
highAvailabilityServices.getCheckpointRecoveryFactory(),
rpcTimeout,
restartStrategy,
currentJobManagerJobMetricGroup,
blobServer,
jobMasterConfiguration.getSlotRequestTimeout(),
log);
}
private void suspendAndClearExecutionGraphFields(Exception cause) {
suspendExecutionGraph(cause);
clearExecutionGraphFields();
}
private void suspendExecutionGraph(Exception cause) {
executionGraph.suspend(cause);
if (jobManagerJobMetricGroup != null) {
jobManagerJobMetricGroup.close();
}
if (jobStatusListener != null) {
jobStatusListener.stop();
}
}
private void clearExecutionGraphFields() {
jobManagerJobMetricGroup = null;
jobStatusListener = null;
}
/**
* Dispose the savepoint stored under the given path.
*
* @param savepointPath path where the savepoint is stored
*/
private void disposeSavepoint(String savepointPath) {
try {
// delete the temporary savepoint
Checkpoints.disposeSavepoint(
savepointPath,
jobMasterConfiguration.getConfiguration(),
userCodeLoader,
log);
} catch (FlinkException | IOException e) {
log.info("Could not dispose temporary rescaling savepoint under {}.", savepointPath, e);
}
}
/**
* Tries to restore the given {@link ExecutionGraph} from the provided {@link SavepointRestoreSettings}.
*
* @param executionGraphToRestore {@link ExecutionGraph} which is supposed to be restored
* @param savepointRestoreSettings {@link SavepointRestoreSettings} containing information about the savepoint to restore from
* @throws Exception if the {@link ExecutionGraph} could not be restored
*/
private void tryRestoreExecutionGraphFromSavepoint(ExecutionGraph executionGraphToRestore, SavepointRestoreSettings savepointRestoreSettings) throws Exception {
if (savepointRestoreSettings.restoreSavepoint()) {
final CheckpointCoordinator checkpointCoordinator = executionGraphToRestore.getCheckpointCoordinator();
if (checkpointCoordinator != null) {
checkpointCoordinator.restoreSavepoint(
savepointRestoreSettings.getRestorePath(),
savepointRestoreSettings.allowNonRestoredState(),
executionGraphToRestore.getAllVertices(),
userCodeLoader);
}
}
}
//----------------------------------------------------------------------------------------------
private void handleJobMasterError(final Throwable cause) {
if (ExceptionUtils.isJvmFatalError(cause)) {
log.error("Fatal error occurred on JobManager.", cause);
// The fatal error handler implementation should make sure that this call is non-blocking
fatalErrorHandler.onFatalError(cause);
} else {
jobCompletionActions.jobMasterFailed(cause);
}
}
private void jobStatusChanged(
final JobStatus newJobStatus,
long timestamp,
@Nullable final Throwable error) {
validateRunsInMainThread();
if (newJobStatus.isGloballyTerminalState()) {
final ArchivedExecutionGraph archivedExecutionGraph = ArchivedExecutionGraph.createFrom(executionGraph);
scheduledExecutorService.execute(() -> jobCompletionActions.jobReachedGloballyTerminalState(archivedExecutionGraph));
}
}
private void notifyOfNewResourceManagerLeader(final String newResourceManagerAddress, final ResourceManagerId resourceManagerId) {
resourceManagerAddress = createResourceManagerAddress(newResourceManagerAddress, resourceManagerId);
reconnectToResourceManager(new FlinkException(String.format("ResourceManager leader changed to new address %s", resourceManagerAddress)));
}
@Nullable
private ResourceManagerAddress createResourceManagerAddress(@Nullable String newResourceManagerAddress, @Nullable ResourceManagerId resourceManagerId) {
if (newResourceManagerAddress != null) {
// the contract is: address == null <=> id == null
checkNotNull(resourceManagerId);
return new ResourceManagerAddress(newResourceManagerAddress, resourceManagerId);
} else {
return null;
}
}
private void reconnectToResourceManager(Exception cause) {
closeResourceManagerConnection(cause);
tryConnectToResourceManager();
}
private void tryConnectToResourceManager() {
if (resourceManagerAddress != null) {
connectToResourceManager();
}
}
private void connectToResourceManager() {
assert(resourceManagerAddress != null);
assert(resourceManagerConnection == null);
assert(establishedResourceManagerConnection == null);
log.info("Connecting to ResourceManager {}", resourceManagerAddress);
resourceManagerConnection = new ResourceManagerConnection(
log,
jobGraph.getJobID(),
resourceId,
getAddress(),
getFencingToken(),
resourceManagerAddress.getAddress(),
resourceManagerAddress.getResourceManagerId(),
scheduledExecutorService);
resourceManagerConnection.start();
}
private void establishResourceManagerConnection(final JobMasterRegistrationSuccess success) {
final ResourceManagerId resourceManagerId = success.getResourceManagerId();
// verify the response with current connection
if (resourceManagerConnection != null
&& Objects.equals(resourceManagerConnection.getTargetLeaderId(), resourceManagerId)) {
log.info("JobManager successfully registered at ResourceManager, leader id: {}.", resourceManagerId);
final ResourceManagerGateway resourceManagerGateway = resourceManagerConnection.getTargetGateway();
final ResourceID resourceManagerResourceId = success.getResourceManagerResourceId();
establishedResourceManagerConnection = new EstablishedResourceManagerConnection(
resourceManagerGateway,
resourceManagerResourceId);
slotPoolGateway.connectToResourceManager(resourceManagerGateway);
resourceManagerHeartbeatManager.monitorTarget(resourceManagerResourceId, new HeartbeatTarget() {
@Override
public void receiveHeartbeat(ResourceID resourceID, Void payload) {
resourceManagerGateway.heartbeatFromJobManager(resourceID);
}
@Override
public void requestHeartbeat(ResourceID resourceID, Void payload) {
// request heartbeat will never be called on the job manager side
}
});
} else {
log.debug("Ignoring resource manager connection to {} because its a duplicate or outdated.", resourceManagerId);
}
}
private void closeResourceManagerConnection(Exception cause) {
if (establishedResourceManagerConnection != null) {
dissolveResourceManagerConnection(establishedResourceManagerConnection, cause);
establishedResourceManagerConnection = null;
}
if (resourceManagerConnection != null) {
// stop a potentially ongoing registration process
resourceManagerConnection.close();
resourceManagerConnection = null;
}
}
private void dissolveResourceManagerConnection(EstablishedResourceManagerConnection establishedResourceManagerConnection, Exception cause) {
final ResourceID resourceManagerResourceID = establishedResourceManagerConnection.getResourceManagerResourceID();
if (log.isDebugEnabled()) {
log.debug("Close ResourceManager connection {}.", resourceManagerResourceID, cause);
} else {
log.info("Close ResourceManager connection {}: {}.", resourceManagerResourceID, cause.getMessage());
}
resourceManagerHeartbeatManager.unmonitorTarget(resourceManagerResourceID);
ResourceManagerGateway resourceManagerGateway = establishedResourceManagerConnection.getResourceManagerGateway();
resourceManagerGateway.disconnectJobManager(jobGraph.getJobID(), cause);
slotPoolGateway.disconnectResourceManager();
}
/**
* Restore the given {@link ExecutionGraph} from the rescaling savepoint. If the {@link ExecutionGraph} could
* be restored, then this savepoint will be recorded as the latest successful modification savepoint. A previous
* savepoint will be disposed. If the rescaling savepoint is empty, the job will be restored from the initially
* provided savepoint.
*
* @param newExecutionGraph to restore
* @param savepointFuture containing the path to the internal modification savepoint
* @return Future which is completed with the restored {@link ExecutionGraph}
*/
private CompletableFuture restoreExecutionGraphFromRescalingSavepoint(ExecutionGraph newExecutionGraph, CompletableFuture savepointFuture) {
return savepointFuture
.thenApplyAsync(
(@Nullable String savepointPath) -> {
if (savepointPath != null) {
try {
tryRestoreExecutionGraphFromSavepoint(newExecutionGraph, SavepointRestoreSettings.forPath(savepointPath, false));
} catch (Exception e) {
final String message = String.format("Could not restore from temporary rescaling savepoint. This might indicate " +
"that the savepoint %s got corrupted. Deleting this savepoint as a precaution.",
savepointPath);
log.info(message);
CompletableFuture
.runAsync(
() -> {
if (savepointPath.equals(lastInternalSavepoint)) {
lastInternalSavepoint = null;
}
},
getMainThreadExecutor())
.thenRunAsync(
() -> disposeSavepoint(savepointPath),
scheduledExecutorService);
throw new CompletionException(new JobModificationException(message, e));
}
} else {
// No rescaling savepoint, restart from the initial savepoint or none
try {
tryRestoreExecutionGraphFromSavepoint(newExecutionGraph, jobGraph.getSavepointRestoreSettings());
} catch (Exception e) {
final String message = String.format("Could not restore from initial savepoint. This might indicate " +
"that the savepoint %s got corrupted.", jobGraph.getSavepointRestoreSettings().getRestorePath());
log.info(message);
throw new CompletionException(new JobModificationException(message, e));
}
}
return newExecutionGraph;
}, scheduledExecutorService);
}
/**
* Takes an internal savepoint for job modification purposes. If the savepoint was not successful because
* not all tasks were running, it returns the last successful modification savepoint.
*
* @param timeout for the operation
* @return Future which is completed with the savepoint path or the last successful modification savepoint if the
* former was not successful
*/
private CompletableFuture getJobModificationSavepoint(Time timeout) {
return triggerSavepoint(
null,
false,
timeout)
.handleAsync(
(String savepointPath, Throwable throwable) -> {
if (throwable != null) {
final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);
if (strippedThrowable instanceof CheckpointTriggerException) {
final CheckpointTriggerException checkpointTriggerException = (CheckpointTriggerException) strippedThrowable;
if (checkpointTriggerException.getCheckpointDeclineReason() == CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING) {
return lastInternalSavepoint;
} else {
throw new CompletionException(checkpointTriggerException);
}
} else {
throw new CompletionException(strippedThrowable);
}
} else {
final String savepointToDispose = lastInternalSavepoint;
lastInternalSavepoint = savepointPath;
if (savepointToDispose != null) {
// dispose the old savepoint asynchronously
CompletableFuture.runAsync(
() -> disposeSavepoint(savepointToDispose),
scheduledExecutorService);
}
return lastInternalSavepoint;
}
},
getMainThreadExecutor());
}
/**
* Rescales the given operators of the {@link JobGraph} of this {@link JobMaster} with respect to given
* parallelism and {@link RescalingBehaviour}.
*
* @param operators to rescale
* @param newParallelism new parallelism for these operators
* @param rescalingBehaviour of the rescaling operation
* @throws FlinkException if the {@link JobGraph} could not be rescaled
*/
private void rescaleJobGraph(Collection operators, int newParallelism, RescalingBehaviour rescalingBehaviour) throws FlinkException {
for (JobVertexID jobVertexId : operators) {
final JobVertex jobVertex = jobGraph.findVertexByID(jobVertexId);
// update max parallelism in case that it has not been configured
final ExecutionJobVertex executionJobVertex = executionGraph.getJobVertex(jobVertexId);
if (executionJobVertex != null) {
jobVertex.setMaxParallelism(executionJobVertex.getMaxParallelism());
}
rescalingBehaviour.accept(jobVertex, newParallelism);
}
}
//----------------------------------------------------------------------------------------------
// Utility classes
//----------------------------------------------------------------------------------------------
private class ResourceManagerLeaderListener implements LeaderRetrievalListener {
@Override
public void notifyLeaderAddress(final String leaderAddress, final UUID leaderSessionID) {
runAsync(
() -> notifyOfNewResourceManagerLeader(
leaderAddress,
ResourceManagerId.fromUuidOrNull(leaderSessionID)));
}
@Override
public void handleError(final Exception exception) {
handleJobMasterError(new Exception("Fatal error in the ResourceManager leader service", exception));
}
}
//----------------------------------------------------------------------------------------------
private class ResourceManagerConnection
extends RegisteredRpcConnection {
private final JobID jobID;
private final ResourceID jobManagerResourceID;
private final String jobManagerRpcAddress;
private final JobMasterId jobMasterId;
ResourceManagerConnection(
final Logger log,
final JobID jobID,
final ResourceID jobManagerResourceID,
final String jobManagerRpcAddress,
final JobMasterId jobMasterId,
final String resourceManagerAddress,
final ResourceManagerId resourceManagerId,
final Executor executor) {
super(log, resourceManagerAddress, resourceManagerId, executor);
this.jobID = checkNotNull(jobID);
this.jobManagerResourceID = checkNotNull(jobManagerResourceID);
this.jobManagerRpcAddress = checkNotNull(jobManagerRpcAddress);
this.jobMasterId = checkNotNull(jobMasterId);
}
@Override
protected RetryingRegistration generateRegistration() {
return new RetryingRegistration(
log, getRpcService(), "ResourceManager", ResourceManagerGateway.class,
getTargetAddress(), getTargetLeaderId()) {
@Override
protected CompletableFuture invokeRegistration(
ResourceManagerGateway gateway, ResourceManagerId fencingToken, long timeoutMillis) {
Time timeout = Time.milliseconds(timeoutMillis);
return gateway.registerJobManager(
jobMasterId,
jobManagerResourceID,
jobManagerRpcAddress,
jobID,
timeout);
}
};
}
@Override
protected void onRegistrationSuccess(final JobMasterRegistrationSuccess success) {
runAsync(() -> {
// filter out outdated connections
//noinspection ObjectEquality
if (this == resourceManagerConnection) {
establishResourceManagerConnection(success);
}
});
}
@Override
protected void onRegistrationFailure(final Throwable failure) {
handleJobMasterError(failure);
}
}
//----------------------------------------------------------------------------------------------
private class JobManagerJobStatusListener implements JobStatusListener {
private volatile boolean running = true;
@Override
public void jobStatusChanges(
final JobID jobId,
final JobStatus newJobStatus,
final long timestamp,
final Throwable error) {
if (running) {
// run in rpc thread to avoid concurrency
runAsync(() -> jobStatusChanged(newJobStatus, timestamp, error));
}
}
private void stop() {
running = false;
}
}
private class TaskManagerHeartbeatListener implements HeartbeatListener {
private final JobMasterGateway jobMasterGateway;
private TaskManagerHeartbeatListener(JobMasterGateway jobMasterGateway) {
this.jobMasterGateway = Preconditions.checkNotNull(jobMasterGateway);
}
@Override
public void notifyHeartbeatTimeout(ResourceID resourceID) {
jobMasterGateway.disconnectTaskManager(
resourceID,
new TimeoutException("Heartbeat of TaskManager with id " + resourceID + " timed out."));
}
@Override
public void reportPayload(ResourceID resourceID, AccumulatorReport payload) {
for (AccumulatorSnapshot snapshot : payload.getAccumulatorSnapshots()) {
executionGraph.updateAccumulators(snapshot);
}
}
@Override
public CompletableFuture retrievePayload(ResourceID resourceID) {
return CompletableFuture.completedFuture(null);
}
}
private class ResourceManagerHeartbeatListener implements HeartbeatListener {
@Override
public void notifyHeartbeatTimeout(final ResourceID resourceId) {
runAsync(() -> {
log.info("The heartbeat of ResourceManager with id {} timed out.", resourceId);
if (establishedResourceManagerConnection != null && establishedResourceManagerConnection.getResourceManagerResourceID().equals(resourceId)) {
reconnectToResourceManager(
new JobMasterException(
String.format("The heartbeat of ResourceManager with id %s timed out.", resourceId)));
}
});
}
@Override
public void reportPayload(ResourceID resourceID, Void payload) {
// nothing to do since the payload is of type Void
}
@Override
public CompletableFuture retrievePayload(ResourceID resourceID) {
return CompletableFuture.completedFuture(null);
}
}
@VisibleForTesting
RestartStrategy getRestartStrategy() {
return restartStrategy;
}
@VisibleForTesting
ExecutionGraph getExecutionGraph() {
return executionGraph;
}
}