org.apache.flink.runtime.resourcemanager.ResourceManager Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.resourcemanager;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.JobStatus;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.blob.TransientBlobKey;
import org.apache.flink.runtime.clusterframework.ApplicationStatus;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.clusterframework.types.ResourceIDRetrievable;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.clusterframework.types.SlotID;
import org.apache.flink.runtime.concurrent.FutureUtils;
import org.apache.flink.runtime.entrypoint.ClusterInformation;
import org.apache.flink.runtime.heartbeat.HeartbeatListener;
import org.apache.flink.runtime.heartbeat.HeartbeatManager;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.heartbeat.HeartbeatTarget;
import org.apache.flink.runtime.heartbeat.NoOpHeartbeatManager;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.instance.InstanceID;
import org.apache.flink.runtime.io.network.partition.DataSetMetaInfo;
import org.apache.flink.runtime.io.network.partition.ResourceManagerPartitionTracker;
import org.apache.flink.runtime.io.network.partition.ResourceManagerPartitionTrackerFactory;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobmaster.JobMaster;
import org.apache.flink.runtime.jobmaster.JobMasterGateway;
import org.apache.flink.runtime.jobmaster.JobMasterId;
import org.apache.flink.runtime.jobmaster.JobMasterRegistrationSuccess;
import org.apache.flink.runtime.leaderelection.LeaderContender;
import org.apache.flink.runtime.leaderelection.LeaderElectionService;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.metrics.MetricNames;
import org.apache.flink.runtime.metrics.groups.ResourceManagerMetricGroup;
import org.apache.flink.runtime.registration.RegistrationResponse;
import org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException;
import org.apache.flink.runtime.resourcemanager.exceptions.UnknownTaskExecutorException;
import org.apache.flink.runtime.resourcemanager.registration.JobManagerRegistration;
import org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration;
import org.apache.flink.runtime.resourcemanager.slotmanager.ResourceActions;
import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager;
import org.apache.flink.runtime.rest.messages.LogInfo;
import org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo;
import org.apache.flink.runtime.rest.messages.taskmanager.ThreadDumpInfo;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.FencedRpcEndpoint;
import org.apache.flink.runtime.rpc.RpcService;
import org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils;
import org.apache.flink.runtime.slots.ResourceRequirement;
import org.apache.flink.runtime.slots.ResourceRequirements;
import org.apache.flink.runtime.taskexecutor.FileType;
import org.apache.flink.runtime.taskexecutor.SlotReport;
import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
import org.apache.flink.runtime.taskexecutor.TaskExecutorHeartbeatPayload;
import org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationRejection;
import org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess;
import org.apache.flink.runtime.taskexecutor.TaskExecutorThreadInfoGateway;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* ResourceManager implementation. The resource manager is responsible for resource de-/allocation
* and bookkeeping.
*
* It offers the following methods as part of its rpc interface to interact with him remotely:
*
*
* - {@link #registerJobManager(JobMasterId, ResourceID, String, JobID, Time)} registers a
* {@link JobMaster} at the resource manager
*
- {@link #requestSlot(JobMasterId, SlotRequest, Time)} requests a slot from the resource
* manager
*
*/
public abstract class ResourceManager
extends FencedRpcEndpoint
implements ResourceManagerGateway, LeaderContender {
public static final String RESOURCE_MANAGER_NAME = "resourcemanager";
/** Unique id of the resource manager. */
private final ResourceID resourceId;
/** All currently registered JobMasterGateways scoped by JobID. */
private final Map jobManagerRegistrations;
/** All currently registered JobMasterGateways scoped by ResourceID. */
private final Map jmResourceIdRegistrations;
/** Service to retrieve the job leader ids. */
private final JobLeaderIdService jobLeaderIdService;
/** All currently registered TaskExecutors with there framework specific worker information. */
private final Map> taskExecutors;
/** Ongoing registration of TaskExecutors per resource ID. */
private final Map>
taskExecutorGatewayFutures;
/** High availability services for leader retrieval and election. */
private final HighAvailabilityServices highAvailabilityServices;
private final HeartbeatServices heartbeatServices;
/** Fatal error handler. */
private final FatalErrorHandler fatalErrorHandler;
/** The slot manager maintains the available slots. */
private final SlotManager slotManager;
private final ResourceManagerPartitionTracker clusterPartitionTracker;
private final ClusterInformation clusterInformation;
protected final ResourceManagerMetricGroup resourceManagerMetricGroup;
protected final Executor ioExecutor;
/** The service to elect a ResourceManager leader. */
private LeaderElectionService leaderElectionService;
/** The heartbeat manager with task managers. */
private HeartbeatManager taskManagerHeartbeatManager;
/** The heartbeat manager with job managers. */
private HeartbeatManager jobManagerHeartbeatManager;
private boolean hasLeadership = false;
/**
* Represents asynchronous state clearing work.
*
* @see #clearStateAsync()
* @see #clearStateInternal()
*/
private CompletableFuture clearStateFuture = CompletableFuture.completedFuture(null);
public ResourceManager(
RpcService rpcService,
ResourceID resourceId,
HighAvailabilityServices highAvailabilityServices,
HeartbeatServices heartbeatServices,
SlotManager slotManager,
ResourceManagerPartitionTrackerFactory clusterPartitionTrackerFactory,
JobLeaderIdService jobLeaderIdService,
ClusterInformation clusterInformation,
FatalErrorHandler fatalErrorHandler,
ResourceManagerMetricGroup resourceManagerMetricGroup,
Time rpcTimeout,
Executor ioExecutor) {
super(rpcService, AkkaRpcServiceUtils.createRandomName(RESOURCE_MANAGER_NAME), null);
this.resourceId = checkNotNull(resourceId);
this.highAvailabilityServices = checkNotNull(highAvailabilityServices);
this.heartbeatServices = checkNotNull(heartbeatServices);
this.slotManager = checkNotNull(slotManager);
this.jobLeaderIdService = checkNotNull(jobLeaderIdService);
this.clusterInformation = checkNotNull(clusterInformation);
this.fatalErrorHandler = checkNotNull(fatalErrorHandler);
this.resourceManagerMetricGroup = checkNotNull(resourceManagerMetricGroup);
this.jobManagerRegistrations = new HashMap<>(4);
this.jmResourceIdRegistrations = new HashMap<>(4);
this.taskExecutors = new HashMap<>(8);
this.taskExecutorGatewayFutures = new HashMap<>(8);
this.jobManagerHeartbeatManager = NoOpHeartbeatManager.getInstance();
this.taskManagerHeartbeatManager = NoOpHeartbeatManager.getInstance();
this.clusterPartitionTracker =
checkNotNull(clusterPartitionTrackerFactory)
.get(
(taskExecutorResourceId, dataSetIds) ->
taskExecutors
.get(taskExecutorResourceId)
.getTaskExecutorGateway()
.releaseClusterPartitions(dataSetIds, rpcTimeout)
.exceptionally(
throwable -> {
log.debug(
"Request for release of cluster partitions belonging to data sets {} was not successful.",
dataSetIds,
throwable);
throw new CompletionException(
throwable);
}));
this.ioExecutor = ioExecutor;
}
// ------------------------------------------------------------------------
// RPC lifecycle methods
// ------------------------------------------------------------------------
@Override
public final void onStart() throws Exception {
try {
log.info("Starting the resource manager.");
startResourceManagerServices();
} catch (Throwable t) {
final ResourceManagerException exception =
new ResourceManagerException(
String.format("Could not start the ResourceManager %s", getAddress()),
t);
onFatalError(exception);
throw exception;
}
}
private void startResourceManagerServices() throws Exception {
try {
leaderElectionService =
highAvailabilityServices.getResourceManagerLeaderElectionService();
initialize();
leaderElectionService.start(this);
jobLeaderIdService.start(new JobLeaderIdActionsImpl());
registerMetrics();
} catch (Exception e) {
handleStartResourceManagerServicesException(e);
}
}
private void handleStartResourceManagerServicesException(Exception e) throws Exception {
try {
stopResourceManagerServices();
} catch (Exception inner) {
e.addSuppressed(inner);
}
throw e;
}
@Override
public final CompletableFuture onStop() {
try {
stopResourceManagerServices();
} catch (Exception exception) {
return FutureUtils.completedExceptionally(
new FlinkException(
"Could not properly shut down the ResourceManager.", exception));
}
return CompletableFuture.completedFuture(null);
}
private void stopResourceManagerServices() throws Exception {
Exception exception = null;
try {
terminate();
} catch (Exception e) {
exception =
new ResourceManagerException("Error while shutting down resource manager", e);
}
stopHeartbeatServices();
try {
slotManager.close();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
try {
leaderElectionService.stop();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
try {
jobLeaderIdService.stop();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
resourceManagerMetricGroup.close();
clearStateInternal();
ExceptionUtils.tryRethrowException(exception);
}
// ------------------------------------------------------------------------
// RPC methods
// ------------------------------------------------------------------------
@Override
public CompletableFuture registerJobManager(
final JobMasterId jobMasterId,
final ResourceID jobManagerResourceId,
final String jobManagerAddress,
final JobID jobId,
final Time timeout) {
checkNotNull(jobMasterId);
checkNotNull(jobManagerResourceId);
checkNotNull(jobManagerAddress);
checkNotNull(jobId);
if (!jobLeaderIdService.containsJob(jobId)) {
try {
jobLeaderIdService.addJob(jobId);
} catch (Exception e) {
ResourceManagerException exception =
new ResourceManagerException(
"Could not add the job " + jobId + " to the job id leader service.",
e);
onFatalError(exception);
log.error("Could not add job {} to job leader id service.", jobId, e);
return FutureUtils.completedExceptionally(exception);
}
}
log.info(
"Registering job manager {}@{} for job {}.", jobMasterId, jobManagerAddress, jobId);
CompletableFuture jobMasterIdFuture;
try {
jobMasterIdFuture = jobLeaderIdService.getLeaderId(jobId);
} catch (Exception e) {
// we cannot check the job leader id so let's fail
// TODO: Maybe it's also ok to skip this check in case that we cannot check the leader
// id
ResourceManagerException exception =
new ResourceManagerException(
"Cannot obtain the "
+ "job leader id future to verify the correct job leader.",
e);
onFatalError(exception);
log.debug(
"Could not obtain the job leader id future to verify the correct job leader.");
return FutureUtils.completedExceptionally(exception);
}
CompletableFuture jobMasterGatewayFuture =
getRpcService().connect(jobManagerAddress, jobMasterId, JobMasterGateway.class);
CompletableFuture registrationResponseFuture =
jobMasterGatewayFuture.thenCombineAsync(
jobMasterIdFuture,
(JobMasterGateway jobMasterGateway, JobMasterId leadingJobMasterId) -> {
if (Objects.equals(leadingJobMasterId, jobMasterId)) {
return registerJobMasterInternal(
jobMasterGateway,
jobId,
jobManagerAddress,
jobManagerResourceId);
} else {
final String declineMessage =
String.format(
"The leading JobMaster id %s did not match the received JobMaster id %s. "
+ "This indicates that a JobMaster leader change has happened.",
leadingJobMasterId, jobMasterId);
log.debug(declineMessage);
return new RegistrationResponse.Failure(
new FlinkException(declineMessage));
}
},
getMainThreadExecutor());
// handle exceptions which might have occurred in one of the futures inputs of combine
return registrationResponseFuture.handleAsync(
(RegistrationResponse registrationResponse, Throwable throwable) -> {
if (throwable != null) {
if (log.isDebugEnabled()) {
log.debug(
"Registration of job manager {}@{} failed.",
jobMasterId,
jobManagerAddress,
throwable);
} else {
log.info(
"Registration of job manager {}@{} failed.",
jobMasterId,
jobManagerAddress);
}
return new RegistrationResponse.Failure(throwable);
} else {
return registrationResponse;
}
},
ioExecutor);
}
@Override
public CompletableFuture registerTaskExecutor(
final TaskExecutorRegistration taskExecutorRegistration, final Time timeout) {
CompletableFuture taskExecutorGatewayFuture =
getRpcService()
.connect(
taskExecutorRegistration.getTaskExecutorAddress(),
TaskExecutorGateway.class);
taskExecutorGatewayFutures.put(
taskExecutorRegistration.getResourceId(), taskExecutorGatewayFuture);
return taskExecutorGatewayFuture.handleAsync(
(TaskExecutorGateway taskExecutorGateway, Throwable throwable) -> {
final ResourceID resourceId = taskExecutorRegistration.getResourceId();
if (taskExecutorGatewayFuture == taskExecutorGatewayFutures.get(resourceId)) {
taskExecutorGatewayFutures.remove(resourceId);
if (throwable != null) {
return new RegistrationResponse.Failure(throwable);
} else {
return registerTaskExecutorInternal(
taskExecutorGateway, taskExecutorRegistration);
}
} else {
log.debug(
"Ignoring outdated TaskExecutorGateway connection for {}.",
resourceId.getStringWithMetadata());
return new RegistrationResponse.Failure(
new FlinkException("Decline outdated task executor registration."));
}
},
getMainThreadExecutor());
}
@Override
public CompletableFuture sendSlotReport(
ResourceID taskManagerResourceId,
InstanceID taskManagerRegistrationId,
SlotReport slotReport,
Time timeout) {
final WorkerRegistration workerTypeWorkerRegistration =
taskExecutors.get(taskManagerResourceId);
if (workerTypeWorkerRegistration.getInstanceID().equals(taskManagerRegistrationId)) {
if (slotManager.registerTaskManager(
workerTypeWorkerRegistration,
slotReport,
workerTypeWorkerRegistration.getTotalResourceProfile(),
workerTypeWorkerRegistration.getDefaultSlotResourceProfile())) {
onWorkerRegistered(workerTypeWorkerRegistration.getWorker());
}
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
return FutureUtils.completedExceptionally(
new ResourceManagerException(
String.format(
"Unknown TaskManager registration id %s.",
taskManagerRegistrationId)));
}
}
protected void onWorkerRegistered(WorkerType worker) {
// noop
}
@Override
public void heartbeatFromTaskManager(
final ResourceID resourceID, final TaskExecutorHeartbeatPayload heartbeatPayload) {
taskManagerHeartbeatManager.receiveHeartbeat(resourceID, heartbeatPayload);
}
@Override
public void heartbeatFromJobManager(final ResourceID resourceID) {
jobManagerHeartbeatManager.receiveHeartbeat(resourceID, null);
}
@Override
public void disconnectTaskManager(final ResourceID resourceId, final Exception cause) {
closeTaskManagerConnection(resourceId, cause).ifPresent((ResourceManager.this::stopWorker));
}
@Override
public void disconnectJobManager(
final JobID jobId, JobStatus jobStatus, final Exception cause) {
if (jobStatus.isGloballyTerminalState()) {
removeJob(jobId, cause);
} else {
closeJobManagerConnection(jobId, ResourceRequirementHandling.RETAIN, cause);
}
}
@Override
public CompletableFuture requestSlot(
JobMasterId jobMasterId, SlotRequest slotRequest, final Time timeout) {
JobID jobId = slotRequest.getJobId();
JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId);
if (null != jobManagerRegistration) {
if (Objects.equals(jobMasterId, jobManagerRegistration.getJobMasterId())) {
log.info(
"Request slot with profile {} for job {} with allocation id {}.",
slotRequest.getResourceProfile(),
slotRequest.getJobId(),
slotRequest.getAllocationId());
try {
slotManager.registerSlotRequest(slotRequest);
} catch (ResourceManagerException e) {
return FutureUtils.completedExceptionally(e);
}
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
return FutureUtils.completedExceptionally(
new ResourceManagerException(
"The job leader's id "
+ jobManagerRegistration.getJobMasterId()
+ " does not match the received id "
+ jobMasterId
+ '.'));
}
} else {
return FutureUtils.completedExceptionally(
new ResourceManagerException(
"Could not find registered job manager for job " + jobId + '.'));
}
}
@Override
public CompletableFuture declareRequiredResources(
JobMasterId jobMasterId, ResourceRequirements resourceRequirements, Time timeout) {
final JobID jobId = resourceRequirements.getJobId();
final JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId);
if (null != jobManagerRegistration) {
if (Objects.equals(jobMasterId, jobManagerRegistration.getJobMasterId())) {
slotManager.processResourceRequirements(resourceRequirements);
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
return FutureUtils.completedExceptionally(
new ResourceManagerException(
"The job leader's id "
+ jobManagerRegistration.getJobMasterId()
+ " does not match the received id "
+ jobMasterId
+ '.'));
}
} else {
return FutureUtils.completedExceptionally(
new ResourceManagerException(
"Could not find registered job manager for job " + jobId + '.'));
}
}
@Override
public void cancelSlotRequest(AllocationID allocationID) {
// As the slot allocations are async, it can not avoid all redundant slots, but should best
// effort.
slotManager.unregisterSlotRequest(allocationID);
}
@Override
public void notifySlotAvailable(
final InstanceID instanceID, final SlotID slotId, final AllocationID allocationId) {
final ResourceID resourceId = slotId.getResourceID();
WorkerRegistration registration = taskExecutors.get(resourceId);
if (registration != null) {
InstanceID registrationId = registration.getInstanceID();
if (Objects.equals(registrationId, instanceID)) {
slotManager.freeSlot(slotId, allocationId);
} else {
log.debug(
"Invalid registration id for slot available message. This indicates an"
+ " outdated request.");
}
} else {
log.debug(
"Could not find registration for resource id {}. Discarding the slot available"
+ "message {}.",
resourceId.getStringWithMetadata(),
slotId);
}
}
/**
* Cleanup application and shut down cluster.
*
* @param finalStatus of the Flink application
* @param diagnostics diagnostics message for the Flink application or {@code null}
*/
@Override
public CompletableFuture deregisterApplication(
final ApplicationStatus finalStatus, @Nullable final String diagnostics) {
log.info(
"Shut down cluster because application is in {}, diagnostics {}.",
finalStatus,
diagnostics);
try {
internalDeregisterApplication(finalStatus, diagnostics);
} catch (ResourceManagerException e) {
log.warn("Could not properly shutdown the application.", e);
}
return CompletableFuture.completedFuture(Acknowledge.get());
}
@Override
public CompletableFuture getNumberOfRegisteredTaskManagers() {
return CompletableFuture.completedFuture(taskExecutors.size());
}
@Override
public CompletableFuture> requestTaskManagerInfo(Time timeout) {
final ArrayList taskManagerInfos = new ArrayList<>(taskExecutors.size());
for (Map.Entry> taskExecutorEntry :
taskExecutors.entrySet()) {
final ResourceID resourceId = taskExecutorEntry.getKey();
final WorkerRegistration taskExecutor = taskExecutorEntry.getValue();
taskManagerInfos.add(
new TaskManagerInfo(
resourceId,
taskExecutor.getTaskExecutorGateway().getAddress(),
taskExecutor.getDataPort(),
taskExecutor.getJmxPort(),
taskManagerHeartbeatManager.getLastHeartbeatFrom(resourceId),
slotManager.getNumberRegisteredSlotsOf(taskExecutor.getInstanceID()),
slotManager.getNumberFreeSlotsOf(taskExecutor.getInstanceID()),
slotManager.getRegisteredResourceOf(taskExecutor.getInstanceID()),
slotManager.getFreeResourceOf(taskExecutor.getInstanceID()),
taskExecutor.getHardwareDescription(),
taskExecutor.getMemoryConfiguration()));
}
return CompletableFuture.completedFuture(taskManagerInfos);
}
@Override
public CompletableFuture requestTaskManagerDetailsInfo(
ResourceID resourceId, Time timeout) {
final WorkerRegistration taskExecutor = taskExecutors.get(resourceId);
if (taskExecutor == null) {
return FutureUtils.completedExceptionally(new UnknownTaskExecutorException(resourceId));
} else {
final InstanceID instanceId = taskExecutor.getInstanceID();
final TaskManagerInfoWithSlots taskManagerInfoWithSlots =
new TaskManagerInfoWithSlots(
new TaskManagerInfo(
resourceId,
taskExecutor.getTaskExecutorGateway().getAddress(),
taskExecutor.getDataPort(),
taskExecutor.getJmxPort(),
taskManagerHeartbeatManager.getLastHeartbeatFrom(resourceId),
slotManager.getNumberRegisteredSlotsOf(instanceId),
slotManager.getNumberFreeSlotsOf(instanceId),
slotManager.getRegisteredResourceOf(instanceId),
slotManager.getFreeResourceOf(instanceId),
taskExecutor.getHardwareDescription(),
taskExecutor.getMemoryConfiguration()),
slotManager.getAllocatedSlotsOf(instanceId));
return CompletableFuture.completedFuture(taskManagerInfoWithSlots);
}
}
@Override
public CompletableFuture requestResourceOverview(Time timeout) {
final int numberSlots = slotManager.getNumberRegisteredSlots();
final int numberFreeSlots = slotManager.getNumberFreeSlots();
final ResourceProfile totalResource = slotManager.getRegisteredResource();
final ResourceProfile freeResource = slotManager.getFreeResource();
return CompletableFuture.completedFuture(
new ResourceOverview(
taskExecutors.size(),
numberSlots,
numberFreeSlots,
totalResource,
freeResource));
}
@Override
public CompletableFuture>>
requestTaskManagerMetricQueryServiceAddresses(Time timeout) {
final ArrayList>>>
metricQueryServiceAddressFutures = new ArrayList<>(taskExecutors.size());
for (Map.Entry> workerRegistrationEntry :
taskExecutors.entrySet()) {
final ResourceID tmResourceId = workerRegistrationEntry.getKey();
final WorkerRegistration workerRegistration =
workerRegistrationEntry.getValue();
final TaskExecutorGateway taskExecutorGateway =
workerRegistration.getTaskExecutorGateway();
final CompletableFuture>>
metricQueryServiceAddressFuture =
taskExecutorGateway
.requestMetricQueryServiceAddress(timeout)
.thenApply(
o ->
o.toOptional()
.map(
address ->
Tuple2.of(
tmResourceId,
address)));
metricQueryServiceAddressFutures.add(metricQueryServiceAddressFuture);
}
return FutureUtils.combineAll(metricQueryServiceAddressFutures)
.thenApply(
collection ->
collection.stream()
.filter(Optional::isPresent)
.map(Optional::get)
.collect(Collectors.toList()));
}
@Override
public CompletableFuture requestTaskManagerFileUploadByType(
ResourceID taskManagerId, FileType fileType, Time timeout) {
log.debug(
"Request {} file upload from TaskExecutor {}.",
fileType,
taskManagerId.getStringWithMetadata());
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug(
"Request upload of file {} from unregistered TaskExecutor {}.",
fileType,
taskManagerId.getStringWithMetadata());
return FutureUtils.completedExceptionally(
new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestFileUploadByType(fileType, timeout);
}
}
@Override
public CompletableFuture requestTaskManagerFileUploadByName(
ResourceID taskManagerId, String fileName, Time timeout) {
log.debug(
"Request upload of file {} from TaskExecutor {}.",
fileName,
taskManagerId.getStringWithMetadata());
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug(
"Request upload of file {} from unregistered TaskExecutor {}.",
fileName,
taskManagerId.getStringWithMetadata());
return FutureUtils.completedExceptionally(
new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestFileUploadByName(fileName, timeout);
}
}
@Override
public CompletableFuture> requestTaskManagerLogList(
ResourceID taskManagerId, Time timeout) {
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug(
"Requested log list from unregistered TaskExecutor {}.",
taskManagerId.getStringWithMetadata());
return FutureUtils.completedExceptionally(
new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestLogList(timeout);
}
}
@Override
public CompletableFuture releaseClusterPartitions(IntermediateDataSetID dataSetId) {
return clusterPartitionTracker.releaseClusterPartitions(dataSetId);
}
@Override
public CompletableFuture