org.apache.flink.runtime.jobmaster.JobMaster Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmaster;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.JobStatus;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.queryablestate.KvStateID;
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot;
import org.apache.flink.runtime.blob.BlobWriter;
import org.apache.flink.runtime.checkpoint.CheckpointMetrics;
import org.apache.flink.runtime.checkpoint.TaskStateSnapshot;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.concurrent.FutureUtils;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.JobStatusListener;
import org.apache.flink.runtime.heartbeat.HeartbeatListener;
import org.apache.flink.runtime.heartbeat.HeartbeatManager;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.heartbeat.HeartbeatTarget;
import org.apache.flink.runtime.heartbeat.NoOpHeartbeatManager;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.io.network.partition.JobMasterPartitionTracker;
import org.apache.flink.runtime.io.network.partition.PartitionTrackerFactory;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.jobmanager.OnCompletionActions;
import org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException;
import org.apache.flink.runtime.jobmaster.factories.JobManagerJobMetricGroupFactory;
import org.apache.flink.runtime.jobmaster.slotpool.Scheduler;
import org.apache.flink.runtime.jobmaster.slotpool.SchedulerFactory;
import org.apache.flink.runtime.jobmaster.slotpool.SlotPool;
import org.apache.flink.runtime.jobmaster.slotpool.SlotPoolFactory;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.messages.FlinkJobNotFoundException;
import org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint;
import org.apache.flink.runtime.messages.webmonitor.JobDetails;
import org.apache.flink.runtime.metrics.groups.JobManagerJobMetricGroup;
import org.apache.flink.runtime.operators.coordination.CoordinationRequest;
import org.apache.flink.runtime.operators.coordination.CoordinationResponse;
import org.apache.flink.runtime.operators.coordination.OperatorEvent;
import org.apache.flink.runtime.query.KvStateLocation;
import org.apache.flink.runtime.query.UnknownKvStateLocation;
import org.apache.flink.runtime.registration.RegisteredRpcConnection;
import org.apache.flink.runtime.registration.RegistrationResponse;
import org.apache.flink.runtime.registration.RetryingRegistration;
import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
import org.apache.flink.runtime.resourcemanager.ResourceManagerId;
import org.apache.flink.runtime.rest.handler.legacy.backpressure.BackPressureStatsTracker;
import org.apache.flink.runtime.rest.handler.legacy.backpressure.OperatorBackPressureStats;
import org.apache.flink.runtime.rest.handler.legacy.backpressure.OperatorBackPressureStatsResponse;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.FencedRpcEndpoint;
import org.apache.flink.runtime.rpc.RpcService;
import org.apache.flink.runtime.rpc.RpcUtils;
import org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils;
import org.apache.flink.runtime.scheduler.SchedulerNG;
import org.apache.flink.runtime.scheduler.SchedulerNGFactory;
import org.apache.flink.runtime.shuffle.ShuffleMaster;
import org.apache.flink.runtime.state.KeyGroupRange;
import org.apache.flink.runtime.taskexecutor.AccumulatorReport;
import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
import org.apache.flink.runtime.taskexecutor.slot.SlotOffer;
import org.apache.flink.runtime.taskmanager.TaskExecutionState;
import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
import org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;
import org.apache.flink.util.InstantiationUtil;
import org.apache.flink.util.SerializedValue;
import org.slf4j.Logger;
import javax.annotation.Nullable;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeoutException;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/**
* JobMaster implementation. The job master is responsible for the execution of a single
* {@link JobGraph}.
*
* It offers the following methods as part of its rpc interface to interact with the JobMaster
* remotely:
*
* - {@link #updateTaskExecutionState} updates the task execution state for
* given task
*
*/
public class JobMaster extends FencedRpcEndpoint implements JobMasterGateway, JobMasterService {
/** Default names for Flink's distributed components. */
public static final String JOB_MANAGER_NAME = "jobmanager";
// ------------------------------------------------------------------------
private final JobMasterConfiguration jobMasterConfiguration;
private final ResourceID resourceId;
private final JobGraph jobGraph;
private final Time rpcTimeout;
private final HighAvailabilityServices highAvailabilityServices;
private final BlobWriter blobWriter;
private final HeartbeatServices heartbeatServices;
private final JobManagerJobMetricGroupFactory jobMetricGroupFactory;
private final ScheduledExecutorService scheduledExecutorService;
private final OnCompletionActions jobCompletionActions;
private final FatalErrorHandler fatalErrorHandler;
private final ClassLoader userCodeLoader;
private final SlotPool slotPool;
private final Scheduler scheduler;
private final SchedulerNGFactory schedulerNGFactory;
// --------- BackPressure --------
private final BackPressureStatsTracker backPressureStatsTracker;
// --------- ResourceManager --------
private final LeaderRetrievalService resourceManagerLeaderRetriever;
// --------- TaskManagers --------
private final Map> registeredTaskManagers;
private final ShuffleMaster> shuffleMaster;
// -------- Mutable fields ---------
private HeartbeatManager taskManagerHeartbeatManager;
private HeartbeatManager resourceManagerHeartbeatManager;
private SchedulerNG schedulerNG;
@Nullable
private JobManagerJobStatusListener jobStatusListener;
@Nullable
private JobManagerJobMetricGroup jobManagerJobMetricGroup;
@Nullable
private ResourceManagerAddress resourceManagerAddress;
@Nullable
private ResourceManagerConnection resourceManagerConnection;
@Nullable
private EstablishedResourceManagerConnection establishedResourceManagerConnection;
private Map accumulators;
private final JobMasterPartitionTracker partitionTracker;
// ------------------------------------------------------------------------
public JobMaster(
RpcService rpcService,
JobMasterConfiguration jobMasterConfiguration,
ResourceID resourceId,
JobGraph jobGraph,
HighAvailabilityServices highAvailabilityService,
SlotPoolFactory slotPoolFactory,
SchedulerFactory schedulerFactory,
JobManagerSharedServices jobManagerSharedServices,
HeartbeatServices heartbeatServices,
JobManagerJobMetricGroupFactory jobMetricGroupFactory,
OnCompletionActions jobCompletionActions,
FatalErrorHandler fatalErrorHandler,
ClassLoader userCodeLoader,
SchedulerNGFactory schedulerNGFactory,
ShuffleMaster> shuffleMaster,
PartitionTrackerFactory partitionTrackerFactory) throws Exception {
super(rpcService, AkkaRpcServiceUtils.createRandomName(JOB_MANAGER_NAME), null);
this.jobMasterConfiguration = checkNotNull(jobMasterConfiguration);
this.resourceId = checkNotNull(resourceId);
this.jobGraph = checkNotNull(jobGraph);
this.rpcTimeout = jobMasterConfiguration.getRpcTimeout();
this.highAvailabilityServices = checkNotNull(highAvailabilityService);
this.blobWriter = jobManagerSharedServices.getBlobWriter();
this.scheduledExecutorService = jobManagerSharedServices.getScheduledExecutorService();
this.jobCompletionActions = checkNotNull(jobCompletionActions);
this.fatalErrorHandler = checkNotNull(fatalErrorHandler);
this.userCodeLoader = checkNotNull(userCodeLoader);
this.schedulerNGFactory = checkNotNull(schedulerNGFactory);
this.heartbeatServices = checkNotNull(heartbeatServices);
this.jobMetricGroupFactory = checkNotNull(jobMetricGroupFactory);
final String jobName = jobGraph.getName();
final JobID jid = jobGraph.getJobID();
log.info("Initializing job {} ({}).", jobName, jid);
resourceManagerLeaderRetriever = highAvailabilityServices.getResourceManagerLeaderRetriever();
this.slotPool = checkNotNull(slotPoolFactory).createSlotPool(jobGraph.getJobID());
this.scheduler = checkNotNull(schedulerFactory).createScheduler(slotPool);
this.registeredTaskManagers = new HashMap<>(4);
this.partitionTracker = checkNotNull(partitionTrackerFactory)
.create(resourceID -> {
Tuple2 taskManagerInfo = registeredTaskManagers.get(resourceID);
if (taskManagerInfo == null) {
return Optional.empty();
}
return Optional.of(taskManagerInfo.f1);
});
this.backPressureStatsTracker = checkNotNull(jobManagerSharedServices.getBackPressureStatsTracker());
this.shuffleMaster = checkNotNull(shuffleMaster);
this.jobManagerJobMetricGroup = jobMetricGroupFactory.create(jobGraph);
this.schedulerNG = createScheduler(jobManagerJobMetricGroup);
this.jobStatusListener = null;
this.resourceManagerConnection = null;
this.establishedResourceManagerConnection = null;
this.accumulators = new HashMap<>();
this.taskManagerHeartbeatManager = NoOpHeartbeatManager.getInstance();
this.resourceManagerHeartbeatManager = NoOpHeartbeatManager.getInstance();
}
private SchedulerNG createScheduler(final JobManagerJobMetricGroup jobManagerJobMetricGroup) throws Exception {
return schedulerNGFactory.createInstance(
log,
jobGraph,
backPressureStatsTracker,
scheduledExecutorService,
jobMasterConfiguration.getConfiguration(),
scheduler,
scheduledExecutorService,
userCodeLoader,
highAvailabilityServices.getCheckpointRecoveryFactory(),
rpcTimeout,
blobWriter,
jobManagerJobMetricGroup,
jobMasterConfiguration.getSlotRequestTimeout(),
shuffleMaster,
partitionTracker);
}
//----------------------------------------------------------------------------------------------
// Lifecycle management
//----------------------------------------------------------------------------------------------
/**
* Start the rpc service and begin to run the job.
*
* @param newJobMasterId The necessary fencing token to run the job
* @return Future acknowledge if the job could be started. Otherwise the future contains an exception
*/
public CompletableFuture start(final JobMasterId newJobMasterId) throws Exception {
// make sure we receive RPC and async calls
start();
return callAsyncWithoutFencing(() -> startJobExecution(newJobMasterId), RpcUtils.INF_TIMEOUT);
}
/**
* Suspending job, all the running tasks will be cancelled, and communication with other components
* will be disposed.
*
* Mostly job is suspended because of the leadership has been revoked, one can be restart this job by
* calling the {@link #start(JobMasterId)} method once we take the leadership back again.
*
*
This method is executed asynchronously
*
* @param cause The reason of why this job been suspended.
* @return Future acknowledge indicating that the job has been suspended. Otherwise the future contains an exception
*/
public CompletableFuture suspend(final Exception cause) {
CompletableFuture suspendFuture = callAsyncWithoutFencing(
() -> suspendExecution(cause),
RpcUtils.INF_TIMEOUT);
return suspendFuture.whenComplete((acknowledge, throwable) -> stop());
}
/**
* Suspend the job and shutdown all other services including rpc.
*/
@Override
public CompletableFuture onStop() {
log.info("Stopping the JobMaster for job {}({}).", jobGraph.getName(), jobGraph.getJobID());
// disconnect from all registered TaskExecutors
final Set taskManagerResourceIds = new HashSet<>(registeredTaskManagers.keySet());
final FlinkException cause = new FlinkException("Stopping JobMaster for job " + jobGraph.getName() +
'(' + jobGraph.getJobID() + ").");
for (ResourceID taskManagerResourceId : taskManagerResourceIds) {
disconnectTaskManager(taskManagerResourceId, cause);
}
// make sure there is a graceful exit
suspendExecution(new FlinkException("JobManager is shutting down."));
// shut down will internally release all registered slots
slotPool.close();
return CompletableFuture.completedFuture(null);
}
//----------------------------------------------------------------------------------------------
// RPC methods
//----------------------------------------------------------------------------------------------
@Override
public CompletableFuture cancel(Time timeout) {
schedulerNG.cancel();
return CompletableFuture.completedFuture(Acknowledge.get());
}
/**
* Updates the task execution state for a given task.
*
* @param taskExecutionState New task execution state for a given task
* @return Acknowledge the task execution state update
*/
@Override
public CompletableFuture updateTaskExecutionState(
final TaskExecutionState taskExecutionState) {
checkNotNull(taskExecutionState, "taskExecutionState");
if (schedulerNG.updateTaskExecutionState(taskExecutionState)) {
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
return FutureUtils.completedExceptionally(
new ExecutionGraphException("The execution attempt " +
taskExecutionState.getID() + " was not found."));
}
}
@Override
public CompletableFuture requestNextInputSplit(
final JobVertexID vertexID,
final ExecutionAttemptID executionAttempt) {
try {
return CompletableFuture.completedFuture(schedulerNG.requestNextInputSplit(vertexID, executionAttempt));
} catch (IOException e) {
log.warn("Error while requesting next input split", e);
return FutureUtils.completedExceptionally(e);
}
}
@Override
public CompletableFuture requestPartitionState(
final IntermediateDataSetID intermediateResultId,
final ResultPartitionID resultPartitionId) {
try {
return CompletableFuture.completedFuture(schedulerNG.requestPartitionState(intermediateResultId, resultPartitionId));
} catch (PartitionProducerDisposedException e) {
log.info("Error while requesting partition state", e);
return FutureUtils.completedExceptionally(e);
}
}
@Override
public CompletableFuture scheduleOrUpdateConsumers(
final ResultPartitionID partitionID,
final Time timeout) {
schedulerNG.scheduleOrUpdateConsumers(partitionID);
return CompletableFuture.completedFuture(Acknowledge.get());
}
@Override
public CompletableFuture disconnectTaskManager(final ResourceID resourceID, final Exception cause) {
log.debug("Disconnect TaskExecutor {} because: {}", resourceID, cause.getMessage());
taskManagerHeartbeatManager.unmonitorTarget(resourceID);
slotPool.releaseTaskManager(resourceID, cause);
partitionTracker.stopTrackingPartitionsFor(resourceID);
Tuple2 taskManagerConnection = registeredTaskManagers.remove(resourceID);
if (taskManagerConnection != null) {
taskManagerConnection.f1.disconnectJobManager(jobGraph.getJobID(), cause);
}
return CompletableFuture.completedFuture(Acknowledge.get());
}
// TODO: This method needs a leader session ID
@Override
public void acknowledgeCheckpoint(
final JobID jobID,
final ExecutionAttemptID executionAttemptID,
final long checkpointId,
final CheckpointMetrics checkpointMetrics,
final TaskStateSnapshot checkpointState) {
schedulerNG.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, checkpointState);
}
// TODO: This method needs a leader session ID
@Override
public void declineCheckpoint(DeclineCheckpoint decline) {
schedulerNG.declineCheckpoint(decline);
}
@Override
public CompletableFuture sendOperatorEventToCoordinator(
final ExecutionAttemptID task,
final OperatorID operatorID,
final SerializedValue serializedEvent) {
try {
final OperatorEvent evt = serializedEvent.deserializeValue(userCodeLoader);
schedulerNG.deliverOperatorEventToCoordinator(task, operatorID, evt);
return CompletableFuture.completedFuture(Acknowledge.get());
} catch (Exception e) {
return FutureUtils.completedExceptionally(e);
}
}
@Override
public CompletableFuture requestKvStateLocation(final JobID jobId, final String registrationName) {
try {
return CompletableFuture.completedFuture(schedulerNG.requestKvStateLocation(jobId, registrationName));
} catch (UnknownKvStateLocation | FlinkJobNotFoundException e) {
log.info("Error while request key-value state location", e);
return FutureUtils.completedExceptionally(e);
}
}
@Override
public CompletableFuture notifyKvStateRegistered(
final JobID jobId,
final JobVertexID jobVertexId,
final KeyGroupRange keyGroupRange,
final String registrationName,
final KvStateID kvStateId,
final InetSocketAddress kvStateServerAddress) {
try {
schedulerNG.notifyKvStateRegistered(jobId, jobVertexId, keyGroupRange, registrationName, kvStateId, kvStateServerAddress);
return CompletableFuture.completedFuture(Acknowledge.get());
} catch (FlinkJobNotFoundException e) {
log.info("Error while receiving notification about key-value state registration", e);
return FutureUtils.completedExceptionally(e);
}
}
@Override
public CompletableFuture notifyKvStateUnregistered(
JobID jobId,
JobVertexID jobVertexId,
KeyGroupRange keyGroupRange,
String registrationName) {
try {
schedulerNG.notifyKvStateUnregistered(jobId, jobVertexId, keyGroupRange, registrationName);
return CompletableFuture.completedFuture(Acknowledge.get());
} catch (FlinkJobNotFoundException e) {
log.info("Error while receiving notification about key-value state de-registration", e);
return FutureUtils.completedExceptionally(e);
}
}
@Override
public CompletableFuture> offerSlots(
final ResourceID taskManagerId,
final Collection slots,
final Time timeout) {
Tuple2 taskManager = registeredTaskManagers.get(taskManagerId);
if (taskManager == null) {
return FutureUtils.completedExceptionally(new Exception("Unknown TaskManager " + taskManagerId));
}
final TaskManagerLocation taskManagerLocation = taskManager.f0;
final TaskExecutorGateway taskExecutorGateway = taskManager.f1;
final RpcTaskManagerGateway rpcTaskManagerGateway = new RpcTaskManagerGateway(taskExecutorGateway, getFencingToken());
return CompletableFuture.completedFuture(
slotPool.offerSlots(
taskManagerLocation,
rpcTaskManagerGateway,
slots));
}
@Override
public void failSlot(
final ResourceID taskManagerId,
final AllocationID allocationId,
final Exception cause) {
if (registeredTaskManagers.containsKey(taskManagerId)) {
internalFailAllocation(allocationId, cause);
} else {
log.warn("Cannot fail slot " + allocationId + " because the TaskManager " +
taskManagerId + " is unknown.");
}
}
private void internalFailAllocation(AllocationID allocationId, Exception cause) {
final Optional resourceIdOptional = slotPool.failAllocation(allocationId, cause);
resourceIdOptional.ifPresent(taskManagerId -> {
if (!partitionTracker.isTrackingPartitionsFor(taskManagerId)) {
releaseEmptyTaskManager(taskManagerId);
}
});
}
private void releaseEmptyTaskManager(ResourceID resourceId) {
disconnectTaskManager(resourceId, new FlinkException(String.format("No more slots registered at JobMaster %s.", resourceId)));
}
@Override
public CompletableFuture registerTaskManager(
final String taskManagerRpcAddress,
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation,
final Time timeout) {
final TaskManagerLocation taskManagerLocation;
try {
taskManagerLocation = TaskManagerLocation.fromUnresolvedLocation(unresolvedTaskManagerLocation);
} catch (Throwable throwable) {
final String errMsg = String.format(
"Could not accept TaskManager registration. TaskManager address %s cannot be resolved. %s",
unresolvedTaskManagerLocation.getExternalAddress(),
throwable.getMessage());
log.error(errMsg);
return CompletableFuture.completedFuture(new RegistrationResponse.Decline(errMsg));
}
final ResourceID taskManagerId = taskManagerLocation.getResourceID();
if (registeredTaskManagers.containsKey(taskManagerId)) {
final RegistrationResponse response = new JMTMRegistrationSuccess(resourceId);
return CompletableFuture.completedFuture(response);
} else {
return getRpcService()
.connect(taskManagerRpcAddress, TaskExecutorGateway.class)
.handleAsync(
(TaskExecutorGateway taskExecutorGateway, Throwable throwable) -> {
if (throwable != null) {
return new RegistrationResponse.Decline(throwable.getMessage());
}
slotPool.registerTaskManager(taskManagerId);
registeredTaskManagers.put(taskManagerId, Tuple2.of(taskManagerLocation, taskExecutorGateway));
// monitor the task manager as heartbeat target
taskManagerHeartbeatManager.monitorTarget(taskManagerId, new HeartbeatTarget() {
@Override
public void receiveHeartbeat(ResourceID resourceID, AllocatedSlotReport payload) {
// the task manager will not request heartbeat, so this method will never be called currently
}
@Override
public void requestHeartbeat(ResourceID resourceID, AllocatedSlotReport allocatedSlotReport) {
taskExecutorGateway.heartbeatFromJobManager(resourceID, allocatedSlotReport);
}
});
return new JMTMRegistrationSuccess(resourceId);
},
getMainThreadExecutor());
}
}
@Override
public void disconnectResourceManager(
final ResourceManagerId resourceManagerId,
final Exception cause) {
if (isConnectingToResourceManager(resourceManagerId)) {
reconnectToResourceManager(cause);
}
}
private boolean isConnectingToResourceManager(ResourceManagerId resourceManagerId) {
return resourceManagerAddress != null
&& resourceManagerAddress.getResourceManagerId().equals(resourceManagerId);
}
@Override
public void heartbeatFromTaskManager(final ResourceID resourceID, AccumulatorReport accumulatorReport) {
taskManagerHeartbeatManager.receiveHeartbeat(resourceID, accumulatorReport);
}
@Override
public void heartbeatFromResourceManager(final ResourceID resourceID) {
resourceManagerHeartbeatManager.requestHeartbeat(resourceID, null);
}
@Override
public CompletableFuture requestJobDetails(Time timeout) {
return CompletableFuture.completedFuture(schedulerNG.requestJobDetails());
}
@Override
public CompletableFuture requestJobStatus(Time timeout) {
return CompletableFuture.completedFuture(schedulerNG.requestJobStatus());
}
@Override
public CompletableFuture requestJob(Time timeout) {
return CompletableFuture.completedFuture(schedulerNG.requestJob());
}
@Override
public CompletableFuture triggerSavepoint(
@Nullable final String targetDirectory,
final boolean cancelJob,
final Time timeout) {
return schedulerNG.triggerSavepoint(targetDirectory, cancelJob);
}
@Override
public CompletableFuture stopWithSavepoint(
@Nullable final String targetDirectory,
final boolean advanceToEndOfEventTime,
final Time timeout) {
return schedulerNG.stopWithSavepoint(targetDirectory, advanceToEndOfEventTime);
}
@Override
public CompletableFuture requestOperatorBackPressureStats(final JobVertexID jobVertexId) {
try {
final Optional operatorBackPressureStats = schedulerNG.requestOperatorBackPressureStats(jobVertexId);
return CompletableFuture.completedFuture(OperatorBackPressureStatsResponse.of(
operatorBackPressureStats.orElse(null)));
} catch (FlinkException e) {
log.info("Error while requesting operator back pressure stats", e);
return FutureUtils.completedExceptionally(e);
}
}
@Override
public void notifyAllocationFailure(AllocationID allocationID, Exception cause) {
internalFailAllocation(allocationID, cause);
}
@Override
public CompletableFuture