org.apache.flink.runtime.resourcemanager.ResourceManager Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.resourcemanager;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.blob.TransientBlobKey;
import org.apache.flink.runtime.clusterframework.ApplicationStatus;
import org.apache.flink.runtime.clusterframework.messages.InfoMessage;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.clusterframework.types.ResourceIDRetrievable;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.clusterframework.types.SlotID;
import org.apache.flink.runtime.concurrent.FutureUtils;
import org.apache.flink.runtime.entrypoint.ClusterInformation;
import org.apache.flink.runtime.heartbeat.HeartbeatListener;
import org.apache.flink.runtime.heartbeat.HeartbeatManager;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.heartbeat.HeartbeatTarget;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.instance.HardwareDescription;
import org.apache.flink.runtime.instance.InstanceID;
import org.apache.flink.runtime.instance.TaskManagerResourceDescription;
import org.apache.flink.runtime.jobmaster.JobMaster;
import org.apache.flink.runtime.jobmaster.JobMasterGateway;
import org.apache.flink.runtime.jobmaster.JobMasterId;
import org.apache.flink.runtime.jobmaster.JobMasterRegistrationSuccess;
import org.apache.flink.runtime.leaderelection.LeaderContender;
import org.apache.flink.runtime.leaderelection.LeaderElectionService;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.metrics.MetricRegistry;
import org.apache.flink.runtime.metrics.dump.MetricQueryService;
import org.apache.flink.runtime.registration.RegistrationResponse;
import org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException;
import org.apache.flink.runtime.resourcemanager.exceptions.UnknownTaskExecutorException;
import org.apache.flink.runtime.resourcemanager.placementconstraint.PlacementConstraint;
import org.apache.flink.runtime.resourcemanager.placementconstraint.SlotTag;
import org.apache.flink.runtime.resourcemanager.registration.JobManagerRegistration;
import org.apache.flink.runtime.resourcemanager.registration.WorkerRegistration;
import org.apache.flink.runtime.resourcemanager.resultpartitionmaster.ResultPartitionMaster;
import org.apache.flink.runtime.resourcemanager.slotmanager.ResourceActions;
import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager;
import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerException;
import org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.FencedRpcEndpoint;
import org.apache.flink.runtime.rpc.RpcService;
import org.apache.flink.runtime.rpc.RpcTimeout;
import org.apache.flink.runtime.taskexecutor.ResultPartitionStatus;
import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
import org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess;
import org.apache.flink.runtime.taskexecutor.TaskExecutorResourceReport;
import org.apache.flink.runtime.util.FileOffsetRange;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeoutException;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* ResourceManager implementation. The resource manager is responsible for resource de-/allocation
* and bookkeeping.
*
* It offers the following methods as part of its rpc interface to interact with him remotely:
*
* - {@link #registerJobManager(JobMasterId, ResourceID, String, JobID, Time)} registers a {@link JobMaster} at the resource manager
* - {@link #requestSlot(JobMasterId, SlotRequest, Time)} requests a slot from the resource manager
*
*/
public abstract class ResourceManager
extends FencedRpcEndpoint
implements ResourceManagerGateway, LeaderContender {
public static final String RESOURCE_MANAGER_NAME = "resourcemanager";
/** Unique id of the resource manager. */
private final ResourceID resourceId;
/** Configuration of the resource manager. */
private final ResourceManagerConfiguration resourceManagerConfiguration;
/** All currently registered JobMasterGateways scoped by JobID. */
private final Map jobManagerRegistrations;
/** All currently registered JobMasterGateways scoped by ResourceID. */
private final Map jmResourceIdRegistrations;
/** Service to retrieve the job leader ids. */
private final JobLeaderIdService jobLeaderIdService;
/** All currently registered TaskExecutors with there framework specific worker information. */
private final Map> taskExecutors;
/** High availability services for leader retrieval and election. */
private final HighAvailabilityServices highAvailabilityServices;
/** The heartbeat manager with task managers. */
private final HeartbeatManager taskManagerHeartbeatManager;
/** The heartbeat manager with job managers. */
private final HeartbeatManager jobManagerHeartbeatManager;
/** Registry to use for metrics. */
private final MetricRegistry metricRegistry;
/** Fatal error handler. */
private final FatalErrorHandler fatalErrorHandler;
/** The slot manager maintains the available slots. */
protected final SlotManager slotManager;
protected final ResultPartitionMaster resultPartitionMaster;
private final ClusterInformation clusterInformation;
/** The service to elect a ResourceManager leader. */
private LeaderElectionService leaderElectionService;
/** All registered listeners for status updates of the ResourceManager. */
private ConcurrentMap infoMessageListeners;
protected final double maxTotalCpuCore;
protected final int maxTotalMemoryMb;
protected ConcurrentHashMap tryAllocateExceedLimitExceptions;
protected ConcurrentHashMap> taskManagerExceptions;
public ResourceManager(
RpcService rpcService,
String resourceManagerEndpointId,
ResourceID resourceId,
ResourceManagerConfiguration resourceManagerConfiguration,
HighAvailabilityServices highAvailabilityServices,
HeartbeatServices heartbeatServices,
SlotManager slotManager,
MetricRegistry metricRegistry,
JobLeaderIdService jobLeaderIdService,
ClusterInformation clusterInformation,
FatalErrorHandler fatalErrorHandler) {
super(rpcService, resourceManagerEndpointId);
this.resourceId = checkNotNull(resourceId);
this.resourceManagerConfiguration = checkNotNull(resourceManagerConfiguration);
this.highAvailabilityServices = checkNotNull(highAvailabilityServices);
this.slotManager = checkNotNull(slotManager);
this.resultPartitionMaster = new ResultPartitionMaster(rpcService.getScheduledExecutor(),
resourceManagerConfiguration.getResultPartitionDisconnectJmTimeout());
this.metricRegistry = checkNotNull(metricRegistry);
this.jobLeaderIdService = checkNotNull(jobLeaderIdService);
this.clusterInformation = checkNotNull(clusterInformation);
this.fatalErrorHandler = checkNotNull(fatalErrorHandler);
this.taskManagerHeartbeatManager = heartbeatServices.createHeartbeatManagerSender(
resourceId,
new TaskManagerHeartbeatListener(),
rpcService.getScheduledExecutor(),
log);
this.jobManagerHeartbeatManager = heartbeatServices.createHeartbeatManagerSender(
resourceId,
new JobManagerHeartbeatListener(),
rpcService.getScheduledExecutor(),
log);
this.jobManagerRegistrations = new HashMap<>(4);
this.jmResourceIdRegistrations = new HashMap<>(4);
this.taskExecutors = new HashMap<>(8);
infoMessageListeners = new ConcurrentHashMap<>(8);
maxTotalCpuCore = resourceManagerConfiguration.getMaxTotalCpuCore();
maxTotalMemoryMb = resourceManagerConfiguration.getMaxTotalMemoryMb();
tryAllocateExceedLimitExceptions = new ConcurrentHashMap<>();
taskManagerExceptions = new ConcurrentHashMap<>();
}
// ------------------------------------------------------------------------
// RPC lifecycle methods
// ------------------------------------------------------------------------
@Override
public void start() throws Exception {
// start a leader
super.start();
leaderElectionService = highAvailabilityServices.getResourceManagerLeaderElectionService();
try {
leaderElectionService.start(this);
} catch (Exception e) {
throw new ResourceManagerException("Could not start the leader election service.", e);
}
try {
jobLeaderIdService.start(new JobLeaderIdActionsImpl());
} catch (Exception e) {
throw new ResourceManagerException("Could not start the job leader id service.", e);
}
initialize();
}
@Override
public CompletableFuture postStop() {
Exception exception = null;
taskManagerHeartbeatManager.stop();
jobManagerHeartbeatManager.stop();
try {
slotManager.close();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
try {
resultPartitionMaster.close();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
try {
leaderElectionService.stop();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
try {
jobLeaderIdService.stop();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
clearState();
if (exception != null) {
return FutureUtils.completedExceptionally(
new FlinkException("Could not properly shut down the ResourceManager.", exception));
} else {
return CompletableFuture.completedFuture(null);
}
}
// ------------------------------------------------------------------------
// RPC methods
// ------------------------------------------------------------------------
@Override
public CompletableFuture registerJobManager(
final JobMasterId jobMasterId,
final ResourceID jobManagerResourceId,
final String jobManagerAddress,
final JobID jobId,
final Time timeout) {
checkNotNull(jobMasterId);
checkNotNull(jobManagerResourceId);
checkNotNull(jobManagerAddress);
checkNotNull(jobId);
if (!jobLeaderIdService.containsJob(jobId)) {
try {
jobLeaderIdService.addJob(jobId);
} catch (Exception e) {
ResourceManagerException exception = new ResourceManagerException("Could not add the job " +
jobId + " to the job id leader service.", e);
onFatalError(exception);
log.error("Could not add job {} to job leader id service.", jobId, e);
return FutureUtils.completedExceptionally(exception);
}
}
log.info("Registering job manager {}@{} for job {}.", jobMasterId, jobManagerAddress, jobId);
CompletableFuture jobMasterIdFuture;
try {
jobMasterIdFuture = jobLeaderIdService.getLeaderId(jobId);
} catch (Exception e) {
// we cannot check the job leader id so let's fail
// TODO: Maybe it's also ok to skip this check in case that we cannot check the leader id
ResourceManagerException exception = new ResourceManagerException("Cannot obtain the " +
"job leader id future to verify the correct job leader.", e);
onFatalError(exception);
log.debug("Could not obtain the job leader id future to verify the correct job leader.");
return FutureUtils.completedExceptionally(exception);
}
CompletableFuture jobMasterGatewayFuture = getRpcService().connect(jobManagerAddress, jobMasterId, JobMasterGateway.class);
CompletableFuture registrationResponseFuture = jobMasterGatewayFuture.thenCombineAsync(
jobMasterIdFuture,
(JobMasterGateway jobMasterGateway, JobMasterId currentJobMasterId) -> {
if (Objects.equals(currentJobMasterId, jobMasterId)) {
return registerJobMasterInternal(
jobMasterGateway,
jobId,
jobManagerAddress,
jobManagerResourceId);
} else {
log.debug("The current JobMaster leader id {} did not match the received " +
"JobMaster id {}.", jobMasterId, currentJobMasterId);
return new RegistrationResponse.Decline("Job manager leader id did not match.");
}
},
getMainThreadExecutor());
// handle exceptions which might have occurred in one of the futures inputs of combine
return registrationResponseFuture.handleAsync(
(RegistrationResponse registrationResponse, Throwable throwable) -> {
if (throwable != null) {
if (log.isDebugEnabled()) {
log.debug("Registration of job manager {}@{} failed.", jobMasterId, jobManagerAddress, throwable);
} else {
log.info("Registration of job manager {}@{} failed.", jobMasterId, jobManagerAddress);
}
return new RegistrationResponse.Decline(throwable.getMessage());
} else {
return registrationResponse;
}
},
getRpcService().getExecutor());
}
@Override
public CompletableFuture registerTaskExecutor(
final String taskExecutorAddress,
final ResourceID taskExecutorResourceId,
final int dataPort,
final HardwareDescription hardwareDescription,
final Time timeout) {
CompletableFuture taskExecutorGatewayFuture = getRpcService().connect(taskExecutorAddress, TaskExecutorGateway.class);
return taskExecutorGatewayFuture.handleAsync(
(TaskExecutorGateway taskExecutorGateway, Throwable throwable) -> {
if (throwable != null) {
return new RegistrationResponse.Decline(throwable.getMessage());
} else {
return registerTaskExecutorInternal(
taskExecutorGateway,
taskExecutorAddress,
taskExecutorResourceId,
dataPort,
hardwareDescription);
}
},
getMainThreadExecutor());
}
@Override
public CompletableFuture sendTaskExecutorResourceReport(ResourceID taskManagerResourceId, InstanceID taskManagerRegistrationId, TaskExecutorResourceReport taskExecutorResourceReport, Time timeout) {
final WorkerRegistration workerTypeWorkerRegistration = taskExecutors.get(taskManagerResourceId);
if (workerTypeWorkerRegistration.getInstanceID().equals(taskManagerRegistrationId)) {
slotManager.registerTaskManager(workerTypeWorkerRegistration, taskExecutorResourceReport.getSlotReport());
resultPartitionMaster.registerTaskManager(workerTypeWorkerRegistration, taskExecutorResourceReport.getResultPartitionReport());
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
return FutureUtils.completedExceptionally(new ResourceManagerException(String.format("Unknown TaskManager registration id %s.", taskManagerRegistrationId)));
}
}
@Override
public void notifyResultPartitionFinished(
ResourceID resourceId,
InstanceID instanceId,
ResultPartitionStatus resultPartitionStatus) {
WorkerRegistration registration = taskExecutors.get(resourceId);
if (registration != null) {
InstanceID registrationId = registration.getInstanceID();
if (Objects.equals(registrationId, instanceId)) {
resultPartitionMaster.addFinishedResultPartition(instanceId, resultPartitionStatus);
} else {
log.debug("Invalid registration id for slot available message. This indicates an" +
" outdated request.");
}
} else {
log.debug("Could not find registration for resource id {}. Discarding the slot available" +
"message {}.", resourceId, resultPartitionStatus);
}
}
@Override
public void heartbeatFromTaskManager(final ResourceID resourceID, final TaskExecutorResourceReport taskExecutorResourceReport) {
taskManagerHeartbeatManager.receiveHeartbeat(resourceID, taskExecutorResourceReport);
}
@Override
public void heartbeatFromJobManager(final ResourceID resourceID) {
jobManagerHeartbeatManager.receiveHeartbeat(resourceID, null);
}
@Override
public void disconnectTaskManager(final ResourceID resourceId, final Exception cause) {
closeTaskManagerConnection(resourceId, cause);
}
@Override
public void disconnectJobManager(final JobID jobId, final Exception cause) {
closeJobManagerConnection(jobId, cause);
}
@Override
public void releaseResultPartitions(JobID jobId) {
resultPartitionMaster.releaseResultPartitions(jobId);
}
@Override
public CompletableFuture setPlacementConstraints(
JobID jobId,
List constraints,
@RpcTimeout Time timeout) {
slotManager.setJobConstraints(jobId, constraints);
return CompletableFuture.completedFuture(Acknowledge.get());
}
@Override
public CompletableFuture requestSlot(
JobMasterId jobMasterId,
SlotRequest slotRequest,
final Time timeout) {
JobID jobId = slotRequest.getJobId();
JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId);
if (null != jobManagerRegistration) {
if (Objects.equals(jobMasterId, jobManagerRegistration.getJobMasterId())) {
log.info("Request slot with profile {} for job {} with allocation id {}.",
slotRequest.getResourceProfile(),
slotRequest.getJobId(),
slotRequest.getAllocationId());
try {
slotManager.registerSlotRequest(slotRequest);
} catch (SlotManagerException e) {
return FutureUtils.completedExceptionally(e);
}
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
return FutureUtils.completedExceptionally(new ResourceManagerException("The job leader's id " +
jobManagerRegistration.getJobMasterId() + " does not match the received id " + jobMasterId + '.'));
}
} else {
return FutureUtils.completedExceptionally(new ResourceManagerException("Could not find registered job manager for job " + jobId + '.'));
}
}
@Override
public CompletableFuture>> requestSlots(
JobMasterId jobMasterId,
List slotRequests,
final Time timeout) {
checkArgument(slotRequests != null && !slotRequests.isEmpty(), "Should contain at least one slot request");
JobID jobId = slotRequests.get(0).getJobId();
JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId);
if (null != jobManagerRegistration) {
if (Objects.equals(jobMasterId, jobManagerRegistration.getJobMasterId())) {
log.info("Requesting {} slots from resource manager.", slotRequests.size());
for (SlotRequest slotRequest : slotRequests) {
log.info("Request slot with profile {} for job {} with allocation id {}.",
slotRequest.getResourceProfile(),
slotRequest.getJobId(),
slotRequest.getAllocationId());
}
List> registerResults = slotManager.registerSlotRequests(slotRequests);
return CompletableFuture.completedFuture(registerResults);
} else {
return FutureUtils.completedExceptionally(new ResourceManagerException("The job leader's id " +
jobManagerRegistration.getJobMasterId() + " does not match the received id " + jobMasterId + '.'));
}
} else {
return FutureUtils.completedExceptionally(new ResourceManagerException("Could not find registered job manager for job " + jobId + '.'));
}
}
@Override
public void cancelSlotRequest(AllocationID allocationID) {
// As the slot allocations are async, it can not avoid all redundant slots, but should best effort.
slotManager.unregisterSlotRequest(allocationID);
}
@Override
public void notifySlotAvailable(
final InstanceID instanceID,
final SlotID slotId,
final AllocationID allocationId) {
final ResourceID resourceId = slotId.getResourceID();
WorkerRegistration registration = taskExecutors.get(resourceId);
if (registration != null) {
InstanceID registrationId = registration.getInstanceID();
if (Objects.equals(registrationId, instanceID)) {
slotManager.freeSlot(slotId, allocationId);
} else {
log.debug("Invalid registration id for slot available message. This indicates an" +
" outdated request.");
}
} else {
log.debug("Could not find registration for resource id {}. Discarding the slot available" +
"message {}.", resourceId, slotId);
}
}
/**
* Registers an info message listener.
*
* @param address address of infoMessage listener to register to this resource manager
*/
@Override
public void registerInfoMessageListener(final String address) {
if (infoMessageListeners.containsKey(address)) {
log.warn("Receive a duplicate registration from info message listener on ({})", address);
} else {
CompletableFuture infoMessageListenerRpcGatewayFuture = getRpcService()
.connect(address, InfoMessageListenerRpcGateway.class);
infoMessageListenerRpcGatewayFuture.whenCompleteAsync(
(InfoMessageListenerRpcGateway gateway, Throwable failure) -> {
if (failure != null) {
log.warn("Receive a registration from unreachable info message listener on ({})", address);
} else {
log.info("Receive a registration from info message listener on ({})", address);
infoMessageListeners.put(address, gateway);
}
},
getMainThreadExecutor());
}
}
/**
* Unregisters an info message listener.
*
* @param address of the info message listener to unregister from this resource manager
*
*/
@Override
public void unRegisterInfoMessageListener(final String address) {
infoMessageListeners.remove(address);
}
/**
* Cleanup application and shut down cluster.
*
* @param finalStatus of the Flink application
* @param diagnostics diagnostics message for the Flink application or {@code null}
*/
@Override
public CompletableFuture deregisterApplication(
final ApplicationStatus finalStatus,
@Nullable final String diagnostics) {
log.info("Shut down cluster because application is in {}, diagnostics {}.", finalStatus, diagnostics);
try {
internalDeregisterApplication(finalStatus, diagnostics);
} catch (ResourceManagerException e) {
log.warn("Could not properly shutdown the application.", e);
}
return CompletableFuture.completedFuture(Acknowledge.get());
}
@Override
public CompletableFuture getNumberOfRegisteredTaskManagers() {
return CompletableFuture.completedFuture(taskExecutors.size());
}
@Override
public CompletableFuture> requestTaskManagerInfo(Time timeout) {
final ArrayList taskManagerInfos = new ArrayList<>(taskExecutors.size());
for (Map.Entry> taskExecutorEntry : taskExecutors.entrySet()) {
final ResourceID resourceId = taskExecutorEntry.getKey();
final WorkerRegistration taskExecutor = taskExecutorEntry.getValue();
taskManagerInfos.add(
new TaskManagerInfo(
resourceId,
taskExecutor.getTaskExecutorGateway().getAddress(),
taskExecutor.getDataPort(),
taskManagerHeartbeatManager.getLastHeartbeatFrom(resourceId),
slotManager.getNumberRegisteredSlotsOf(taskExecutor.getInstanceID()),
slotManager.getNumberFreeSlotsOf(taskExecutor.getInstanceID()),
taskExecutor.getHardwareDescription(),
TaskManagerResourceDescription.fromResourceProfile(slotManager.getTotalResourceOf(resourceId)),
TaskManagerResourceDescription.fromResourceProfile(slotManager.getAvailableResourceOf(resourceId))));
}
return CompletableFuture.completedFuture(taskManagerInfos);
}
@Override
public CompletableFuture requestTaskManagerInfo(ResourceID resourceId, Time timeout) {
final WorkerRegistration taskExecutor = taskExecutors.get(resourceId);
if (taskExecutor == null) {
return FutureUtils.completedExceptionally(new UnknownTaskExecutorException(resourceId));
} else {
final InstanceID instanceId = taskExecutor.getInstanceID();
final TaskManagerInfo taskManagerInfo = new TaskManagerInfo(
resourceId,
taskExecutor.getTaskExecutorGateway().getAddress(),
taskExecutor.getDataPort(),
taskManagerHeartbeatManager.getLastHeartbeatFrom(resourceId),
slotManager.getNumberRegisteredSlotsOf(instanceId),
slotManager.getNumberFreeSlotsOf(instanceId),
taskExecutor.getHardwareDescription(),
TaskManagerResourceDescription.fromResourceProfile(slotManager.getTotalResourceOf(resourceId)),
TaskManagerResourceDescription.fromResourceProfile(slotManager.getAvailableResourceOf(resourceId)));
return CompletableFuture.completedFuture(taskManagerInfo);
}
}
@Override
public CompletableFuture requestResourceOverview(Time timeout) {
final int numberSlots = slotManager.getNumberRegisteredSlots();
final int numberFreeSlots = slotManager.getNumberFreeSlots();
return CompletableFuture.completedFuture(
new ResourceOverview(
taskExecutors.size(),
numberSlots,
numberFreeSlots,
slotManager.getTotalResource(),
slotManager.getAvailableResource()));
}
@Override
public CompletableFuture>> requestTaskManagerMetricQueryServicePaths(Time timeout) {
final ArrayList> metricQueryServicePaths = new ArrayList<>(taskExecutors.size());
for (Map.Entry> workerRegistrationEntry : taskExecutors.entrySet()) {
final ResourceID tmResourceId = workerRegistrationEntry.getKey();
final WorkerRegistration workerRegistration = workerRegistrationEntry.getValue();
final String taskManagerAddress = workerRegistration.getTaskExecutorGateway().getAddress();
final String tmMetricQueryServicePath = taskManagerAddress.substring(0, taskManagerAddress.lastIndexOf('/') + 1) +
MetricQueryService.METRIC_QUERY_SERVICE_NAME + '_' + tmResourceId.getResourceIdString();
metricQueryServicePaths.add(Tuple2.of(tmResourceId, tmMetricQueryServicePath));
}
return CompletableFuture.completedFuture(metricQueryServicePaths);
}
@Override
public CompletableFuture requestTaskManagerFileUpload(ResourceID taskManagerId, String filename, FileOffsetRange fileOffsetRange, Time timeout) {
log.debug("Request file {} with {} upload from TaskExecutor {}.", filename, fileOffsetRange, taskManagerId);
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug("Requested file {} upload from unregistered TaskExecutor {}.", filename, taskManagerId);
return FutureUtils.completedExceptionally(new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestFileUpload(filename, fileOffsetRange, timeout);
}
}
@Override
public CompletableFuture> requestTaskManagerFileUploadReturnLength(ResourceID taskManagerId, String filename, FileOffsetRange fileOffsetRange, Time timeout) {
log.debug("Request file {} with {} upload from TaskExecutor {}.", filename, fileOffsetRange, taskManagerId);
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug("Requested file {} upload from unregistered TaskExecutor {}.", filename, taskManagerId);
return FutureUtils.completedExceptionally(new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestTaskManagerFileUploadReturnLength(filename, fileOffsetRange, timeout);
}
}
@Override
public CompletableFuture>> requestTaskManagerLogList(ResourceID taskManagerId, Time timeout) {
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug("Requested historical loglist from unregistered TaskExecutor {}.", taskManagerId);
return FutureUtils.completedExceptionally(new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestLogList(timeout);
}
}
@Override
public CompletableFuture> requestJmx(ResourceID taskManagerId, Time timeout) {
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug("Requested jmx server information for TaskExecutor {}.", taskManagerId);
return FutureUtils.completedExceptionally(new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestJmx(timeout);
}
}
@Override
public CompletableFuture> requestTmLogAndStdoutFileName(ResourceID taskManagerId, Time timeout) {
final WorkerRegistration taskExecutor = taskExecutors.get(taskManagerId);
if (taskExecutor == null) {
log.debug("Requested log and stdout file name for TaskExecutor {}.", taskManagerId);
return FutureUtils.completedExceptionally(new UnknownTaskExecutorException(taskManagerId));
} else {
return taskExecutor.getTaskExecutorGateway().requestTmLogAndStdoutFileName(timeout);
}
}
@Override
public CompletableFuture