All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.taskexecutor.TaskExecutor Maven / Gradle / Ivy

There is a newer version: 1.19.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.taskexecutor;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.ClusterOptions;
import org.apache.flink.management.jmx.JMXService;
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot;
import org.apache.flink.runtime.blob.JobPermanentBlobService;
import org.apache.flink.runtime.blob.TaskExecutorBlobService;
import org.apache.flink.runtime.blob.TransientBlobKey;
import org.apache.flink.runtime.blob.TransientBlobService;
import org.apache.flink.runtime.checkpoint.CheckpointException;
import org.apache.flink.runtime.checkpoint.CheckpointFailureReason;
import org.apache.flink.runtime.checkpoint.CheckpointOptions;
import org.apache.flink.runtime.checkpoint.JobManagerTaskRestore;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.clusterframework.types.SlotID;
import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor;
import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
import org.apache.flink.runtime.entrypoint.ClusterInformation;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.execution.librarycache.LibraryCacheManager;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.JobInformation;
import org.apache.flink.runtime.executiongraph.PartitionInfo;
import org.apache.flink.runtime.executiongraph.TaskInformation;
import org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider;
import org.apache.flink.runtime.filecache.FileCache;
import org.apache.flink.runtime.heartbeat.HeartbeatListener;
import org.apache.flink.runtime.heartbeat.HeartbeatManager;
import org.apache.flink.runtime.heartbeat.HeartbeatReceiver;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.instance.HardwareDescription;
import org.apache.flink.runtime.instance.InstanceID;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionInfo;
import org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider;
import org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway;
import org.apache.flink.runtime.jobmaster.AllocatedSlotInfo;
import org.apache.flink.runtime.jobmaster.AllocatedSlotReport;
import org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection;
import org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess;
import org.apache.flink.runtime.jobmaster.JobMasterGateway;
import org.apache.flink.runtime.jobmaster.JobMasterId;
import org.apache.flink.runtime.jobmaster.ResourceManagerAddress;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
import org.apache.flink.runtime.memory.MemoryManager;
import org.apache.flink.runtime.memory.SharedResources;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.messages.TaskThreadInfoResponse;
import org.apache.flink.runtime.messages.ThreadInfoSample;
import org.apache.flink.runtime.metrics.MetricNames;
import org.apache.flink.runtime.metrics.groups.TaskManagerJobMetricGroup;
import org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup;
import org.apache.flink.runtime.metrics.groups.TaskMetricGroup;
import org.apache.flink.runtime.operators.coordination.OperatorEvent;
import org.apache.flink.runtime.operators.coordination.TaskNotRunningException;
import org.apache.flink.runtime.query.KvStateClientProxy;
import org.apache.flink.runtime.query.KvStateRegistry;
import org.apache.flink.runtime.query.KvStateServer;
import org.apache.flink.runtime.registration.RegistrationConnectionListener;
import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
import org.apache.flink.runtime.resourcemanager.ResourceManagerId;
import org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration;
import org.apache.flink.runtime.rest.messages.LogInfo;
import org.apache.flink.runtime.rest.messages.ThreadDumpInfo;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.RpcEndpoint;
import org.apache.flink.runtime.rpc.RpcService;
import org.apache.flink.runtime.rpc.RpcServiceUtils;
import org.apache.flink.runtime.security.token.DelegationTokenReceiverRepository;
import org.apache.flink.runtime.shuffle.ShuffleDescriptor;
import org.apache.flink.runtime.shuffle.ShuffleEnvironment;
import org.apache.flink.runtime.state.TaskExecutorChannelStateExecutorFactoryManager;
import org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager;
import org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager;
import org.apache.flink.runtime.state.TaskLocalStateStore;
import org.apache.flink.runtime.state.TaskStateManager;
import org.apache.flink.runtime.state.TaskStateManagerImpl;
import org.apache.flink.runtime.state.changelog.StateChangelogStorage;
import org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException;
import org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException;
import org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException;
import org.apache.flink.runtime.taskexecutor.exceptions.TaskException;
import org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException;
import org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException;
import org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder;
import org.apache.flink.runtime.taskexecutor.rpc.RpcGlobalAggregateManager;
import org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider;
import org.apache.flink.runtime.taskexecutor.rpc.RpcKvStateRegistryListener;
import org.apache.flink.runtime.taskexecutor.rpc.RpcPartitionStateChecker;
import org.apache.flink.runtime.taskexecutor.rpc.RpcTaskOperatorEventGateway;
import org.apache.flink.runtime.taskexecutor.slot.SlotActions;
import org.apache.flink.runtime.taskexecutor.slot.SlotAllocationSnapshot;
import org.apache.flink.runtime.taskexecutor.slot.SlotAllocationSnapshotPersistenceService;
import org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException;
import org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException;
import org.apache.flink.runtime.taskexecutor.slot.SlotOffer;
import org.apache.flink.runtime.taskexecutor.slot.TaskSlot;
import org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable;
import org.apache.flink.runtime.taskmanager.CheckpointResponder;
import org.apache.flink.runtime.taskmanager.Task;
import org.apache.flink.runtime.taskmanager.TaskExecutionState;
import org.apache.flink.runtime.taskmanager.TaskManagerActions;
import org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation;
import org.apache.flink.runtime.webmonitor.threadinfo.ThreadInfoSamplesRequest;
import org.apache.flink.types.SerializableOptional;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;
import org.apache.flink.util.FlinkExpectedException;
import org.apache.flink.util.OptionalConsumer;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.SerializedValue;
import org.apache.flink.util.StringUtils;
import org.apache.flink.util.concurrent.ExecutorThreadFactory;
import org.apache.flink.util.concurrent.FutureUtils;

import org.apache.flink.shaded.guava30.com.google.common.collect.ImmutableList;
import org.apache.flink.shaded.guava30.com.google.common.collect.Sets;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetSocketAddress;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeoutException;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * TaskExecutor implementation. The task executor is responsible for the execution of multiple
 * {@link Task}.
 */
public class TaskExecutor extends RpcEndpoint implements TaskExecutorGateway {

    public static final String TASK_MANAGER_NAME = "taskmanager";

    /** The access to the leader election and retrieval services. */
    private final HighAvailabilityServices haServices;

    private final TaskManagerServices taskExecutorServices;

    /** The task manager configuration. */
    private final TaskManagerConfiguration taskManagerConfiguration;

    /** The fatal error handler to use in case of a fatal error. */
    private final FatalErrorHandler fatalErrorHandler;

    private final TaskExecutorBlobService taskExecutorBlobService;

    private final LibraryCacheManager libraryCacheManager;

    /** The address to metric query service on this Task Manager. */
    @Nullable private final String metricQueryServiceAddress;

    // --------- TaskManager services --------

    /** The connection information of this task manager. */
    private final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation;

    private final TaskManagerMetricGroup taskManagerMetricGroup;

    /** The state manager for this task, providing state managers per slot. */
    private final TaskExecutorLocalStateStoresManager localStateStoresManager;

    /** The changelog manager for this task, providing changelog storage per job. */
    private final TaskExecutorStateChangelogStoragesManager changelogStoragesManager;

    /**
     * The channel state executor factory manager for this task, providing channel state executor
     * factory per job.
     */
    private final TaskExecutorChannelStateExecutorFactoryManager channelStateExecutorFactoryManager;

    /** Information provider for external resources. */
    private final ExternalResourceInfoProvider externalResourceInfoProvider;

    /** The network component in the task manager. */
    private final ShuffleEnvironment shuffleEnvironment;

    /** The kvState registration service in the task manager. */
    private final KvStateService kvStateService;

    private final Executor ioExecutor;

    /** {@link MemoryManager} shared across all tasks. */
    private final SharedResources sharedResources;

    // --------- task slot allocation table -----------

    private final TaskSlotTable taskSlotTable;

    private final Map currentSlotOfferPerJob = new HashMap<>();

    private final JobTable jobTable;

    private final JobLeaderService jobLeaderService;

    private final LeaderRetrievalService resourceManagerLeaderRetriever;

    private final SlotAllocationSnapshotPersistenceService slotAllocationSnapshotPersistenceService;

    // ------------------------------------------------------------------------

    private final HardwareDescription hardwareDescription;

    private final TaskExecutorMemoryConfiguration memoryConfiguration;

    private FileCache fileCache;

    /** The heartbeat manager for job manager in the task manager. */
    private final HeartbeatManager
            jobManagerHeartbeatManager;

    /** The heartbeat manager for resource manager in the task manager. */
    private final HeartbeatManager
            resourceManagerHeartbeatManager;

    private final TaskExecutorPartitionTracker partitionTracker;

    private final DelegationTokenReceiverRepository delegationTokenReceiverRepository;

    // --------- resource manager --------

    @Nullable private ResourceManagerAddress resourceManagerAddress;

    @Nullable private EstablishedResourceManagerConnection establishedResourceManagerConnection;

    @Nullable private TaskExecutorToResourceManagerConnection resourceManagerConnection;

    @Nullable private UUID currentRegistrationTimeoutId;

    private final Map>>
            taskResultPartitionCleanupFuturesPerJob = new HashMap<>(8);

    private final ThreadInfoSampleService threadInfoSampleService;

    public TaskExecutor(
            RpcService rpcService,
            TaskManagerConfiguration taskManagerConfiguration,
            HighAvailabilityServices haServices,
            TaskManagerServices taskExecutorServices,
            ExternalResourceInfoProvider externalResourceInfoProvider,
            HeartbeatServices heartbeatServices,
            TaskManagerMetricGroup taskManagerMetricGroup,
            @Nullable String metricQueryServiceAddress,
            TaskExecutorBlobService taskExecutorBlobService,
            FatalErrorHandler fatalErrorHandler,
            TaskExecutorPartitionTracker partitionTracker,
            DelegationTokenReceiverRepository delegationTokenReceiverRepository) {

        super(rpcService, RpcServiceUtils.createRandomName(TASK_MANAGER_NAME));

        checkArgument(
                taskManagerConfiguration.getNumberSlots() > 0,
                "The number of slots has to be larger than 0.");

        this.taskManagerConfiguration = checkNotNull(taskManagerConfiguration);
        this.taskExecutorServices = checkNotNull(taskExecutorServices);
        this.haServices = checkNotNull(haServices);
        this.fatalErrorHandler = checkNotNull(fatalErrorHandler);
        this.partitionTracker = partitionTracker;
        this.delegationTokenReceiverRepository = checkNotNull(delegationTokenReceiverRepository);
        this.taskManagerMetricGroup = checkNotNull(taskManagerMetricGroup);
        this.taskExecutorBlobService = checkNotNull(taskExecutorBlobService);
        this.metricQueryServiceAddress = metricQueryServiceAddress;
        this.externalResourceInfoProvider = checkNotNull(externalResourceInfoProvider);

        this.libraryCacheManager = taskExecutorServices.getLibraryCacheManager();
        this.taskSlotTable = taskExecutorServices.getTaskSlotTable();
        this.jobTable = taskExecutorServices.getJobTable();
        this.jobLeaderService = taskExecutorServices.getJobLeaderService();
        this.unresolvedTaskManagerLocation =
                taskExecutorServices.getUnresolvedTaskManagerLocation();
        this.localStateStoresManager = taskExecutorServices.getTaskManagerStateStore();
        this.changelogStoragesManager = taskExecutorServices.getTaskManagerChangelogManager();
        this.channelStateExecutorFactoryManager =
                taskExecutorServices.getTaskManagerChannelStateManager();
        this.shuffleEnvironment = taskExecutorServices.getShuffleEnvironment();
        this.kvStateService = taskExecutorServices.getKvStateService();
        this.ioExecutor = taskExecutorServices.getIOExecutor();
        this.resourceManagerLeaderRetriever = haServices.getResourceManagerLeaderRetriever();

        this.hardwareDescription =
                HardwareDescription.extractFromSystem(taskExecutorServices.getManagedMemorySize());
        this.memoryConfiguration =
                TaskExecutorMemoryConfiguration.create(taskManagerConfiguration.getConfiguration());

        this.resourceManagerAddress = null;
        this.resourceManagerConnection = null;
        this.currentRegistrationTimeoutId = null;

        final ResourceID resourceId =
                taskExecutorServices.getUnresolvedTaskManagerLocation().getResourceID();
        this.jobManagerHeartbeatManager =
                createJobManagerHeartbeatManager(heartbeatServices, resourceId);
        this.resourceManagerHeartbeatManager =
                createResourceManagerHeartbeatManager(heartbeatServices, resourceId);

        ExecutorThreadFactory sampleThreadFactory =
                new ExecutorThreadFactory.Builder()
                        .setPoolName("flink-thread-info-sampler")
                        .build();
        ScheduledExecutorService sampleExecutor =
                Executors.newSingleThreadScheduledExecutor(sampleThreadFactory);
        this.threadInfoSampleService = new ThreadInfoSampleService(sampleExecutor);

        this.slotAllocationSnapshotPersistenceService =
                taskExecutorServices.getSlotAllocationSnapshotPersistenceService();

        this.sharedResources = taskExecutorServices.getSharedResources();
    }

    private HeartbeatManager
            createResourceManagerHeartbeatManager(
                    HeartbeatServices heartbeatServices, ResourceID resourceId) {
        return heartbeatServices.createHeartbeatManager(
                resourceId, new ResourceManagerHeartbeatListener(), getMainThreadExecutor(), log);
    }

    private HeartbeatManager
            createJobManagerHeartbeatManager(
                    HeartbeatServices heartbeatServices, ResourceID resourceId) {
        return heartbeatServices.createHeartbeatManager(
                resourceId, new JobManagerHeartbeatListener(), getMainThreadExecutor(), log);
    }

    @Override
    public CompletableFuture canBeReleased() {
        return CompletableFuture.completedFuture(
                shuffleEnvironment.getPartitionsOccupyingLocalResources().isEmpty());
    }

    @Override
    public CompletableFuture> requestLogList(Time timeout) {
        return CompletableFuture.supplyAsync(
                () -> {
                    final String logDir = taskManagerConfiguration.getTaskManagerLogDir();
                    if (logDir != null) {
                        final File[] logFiles = new File(logDir).listFiles();

                        if (logFiles == null) {
                            throw new CompletionException(
                                    new FlinkException(
                                            String.format(
                                                    "There isn't a log file in TaskExecutor’s log dir %s.",
                                                    logDir)));
                        }

                        return Arrays.stream(logFiles)
                                .filter(File::isFile)
                                .map(
                                        logFile ->
                                                new LogInfo(
                                                        logFile.getName(),
                                                        logFile.length(),
                                                        logFile.lastModified()))
                                .collect(Collectors.toList());
                    }
                    return Collections.emptyList();
                },
                ioExecutor);
    }

    // ------------------------------------------------------------------------
    //  Life cycle
    // ------------------------------------------------------------------------

    @Override
    public void onStart() throws Exception {
        try {
            startTaskExecutorServices();
        } catch (Throwable t) {
            final TaskManagerException exception =
                    new TaskManagerException(
                            String.format("Could not start the TaskExecutor %s", getAddress()), t);
            onFatalError(exception);
            throw exception;
        }

        startRegistrationTimeout();
    }

    private void startTaskExecutorServices() throws Exception {
        try {
            // start by connecting to the ResourceManager
            resourceManagerLeaderRetriever.start(new ResourceManagerLeaderListener());

            // tell the task slot table who's responsible for the task slot actions
            taskSlotTable.start(new SlotActionsImpl(), getMainThreadExecutor());

            // start the job leader service
            jobLeaderService.start(
                    getAddress(), getRpcService(), haServices, new JobLeaderListenerImpl());

            fileCache =
                    new FileCache(
                            taskManagerConfiguration.getTmpDirectories(),
                            taskExecutorBlobService.getPermanentBlobService());

            tryLoadLocalAllocationSnapshots();
        } catch (Exception e) {
            handleStartTaskExecutorServicesException(e);
        }
    }

    private void handleStartTaskExecutorServicesException(Exception e) throws Exception {
        try {
            stopTaskExecutorServices();
        } catch (Exception inner) {
            e.addSuppressed(inner);
        }

        throw e;
    }

    /** Called to shut down the TaskManager. The method closes all TaskManager services. */
    @Override
    public CompletableFuture onStop() {
        log.info("Stopping TaskExecutor {}.", getAddress());

        Throwable jobManagerDisconnectThrowable = null;

        FlinkExpectedException cause =
                new FlinkExpectedException("The TaskExecutor is shutting down.");

        closeResourceManagerConnection(cause);

        for (JobTable.Job job : jobTable.getJobs()) {
            try {
                closeJob(job, cause);
            } catch (Throwable t) {
                jobManagerDisconnectThrowable =
                        ExceptionUtils.firstOrSuppressed(t, jobManagerDisconnectThrowable);
            }
        }

        changelogStoragesManager.shutdown();
        channelStateExecutorFactoryManager.shutdown();

        Preconditions.checkState(jobTable.isEmpty());

        final Throwable throwableBeforeTasksCompletion = jobManagerDisconnectThrowable;

        return FutureUtils.runAfterwards(taskSlotTable.closeAsync(), this::stopTaskExecutorServices)
                .handle(
                        (ignored, throwable) -> {
                            handleOnStopException(throwableBeforeTasksCompletion, throwable);
                            return null;
                        });
    }

    private void handleOnStopException(
            Throwable throwableBeforeTasksCompletion, Throwable throwableAfterTasksCompletion) {
        final Throwable throwable;

        if (throwableBeforeTasksCompletion != null) {
            throwable =
                    ExceptionUtils.firstOrSuppressed(
                            throwableBeforeTasksCompletion, throwableAfterTasksCompletion);
        } else {
            throwable = throwableAfterTasksCompletion;
        }

        if (throwable != null) {
            throw new CompletionException(
                    new FlinkException("Error while shutting the TaskExecutor down.", throwable));
        } else {
            log.info("Stopped TaskExecutor {}.", getAddress());
        }
    }

    private void stopTaskExecutorServices() throws Exception {
        Exception exception = null;

        try {
            threadInfoSampleService.close();
        } catch (Exception e) {
            exception = ExceptionUtils.firstOrSuppressed(e, exception);
        }

        try {
            jobLeaderService.stop();
        } catch (Exception e) {
            exception = ExceptionUtils.firstOrSuppressed(e, exception);
        }

        try {
            resourceManagerLeaderRetriever.stop();
        } catch (Exception e) {
            exception = ExceptionUtils.firstOrSuppressed(e, exception);
        }

        try {
            taskExecutorServices.shutDown();
        } catch (Exception e) {
            exception = ExceptionUtils.firstOrSuppressed(e, exception);
        }

        try {
            fileCache.shutdown();
        } catch (Exception e) {
            exception = ExceptionUtils.firstOrSuppressed(e, exception);
        }

        // it will call close() recursively from the parent to children
        taskManagerMetricGroup.close();

        ExceptionUtils.tryRethrowException(exception);
    }

    // ======================================================================
    //  RPC methods
    // ======================================================================

    @Override
    public CompletableFuture requestThreadInfoSamples(
            final Collection taskExecutionAttemptIds,
            final ThreadInfoSamplesRequest requestParams,
            final Time timeout) {

        final Collection tasks = new ArrayList<>();
        for (ExecutionAttemptID executionAttemptId : taskExecutionAttemptIds) {
            final Task task = taskSlotTable.getTask(executionAttemptId);
            if (task == null) {
                log.warn(
                        String.format(
                                "Cannot sample task %s. "
                                        + "Task is not known to the task manager.",
                                executionAttemptId));
            } else {
                tasks.add(task);
            }
        }

        Map sampleableTasks =
                tasks.stream()
                        .collect(
                                Collectors.toMap(
                                        task -> task.getExecutingThread().getId(),
                                        Task::getExecutionId));

        final CompletableFuture>>
                stackTracesFuture =
                        threadInfoSampleService.requestThreadInfoSamples(
                                sampleableTasks, requestParams);

        return stackTracesFuture.thenApply(TaskThreadInfoResponse::new);
    }

    // ----------------------------------------------------------------------
    // Task lifecycle RPCs
    // ----------------------------------------------------------------------

    @Override
    public CompletableFuture submitTask(
            TaskDeploymentDescriptor tdd, JobMasterId jobMasterId, Time timeout) {

        try {
            final JobID jobId = tdd.getJobId();
            final ExecutionAttemptID executionAttemptID = tdd.getExecutionAttemptId();

            final JobTable.Connection jobManagerConnection =
                    jobTable.getConnection(jobId)
                            .orElseThrow(
                                    () -> {
                                        final String message =
                                                "Could not submit task because there is no JobManager "
                                                        + "associated for the job "
                                                        + jobId
                                                        + '.';

                                        log.debug(message);
                                        return new TaskSubmissionException(message);
                                    });

            if (!Objects.equals(jobManagerConnection.getJobMasterId(), jobMasterId)) {
                final String message =
                        "Rejecting the task submission because the job manager leader id "
                                + jobMasterId
                                + " does not match the expected job manager leader id "
                                + jobManagerConnection.getJobMasterId()
                                + '.';

                log.debug(message);
                throw new TaskSubmissionException(message);
            }

            if (!taskSlotTable.tryMarkSlotActive(jobId, tdd.getAllocationId())) {
                final String message =
                        "No task slot allocated for job ID "
                                + jobId
                                + " and allocation ID "
                                + tdd.getAllocationId()
                                + '.';
                log.debug(message);
                throw new TaskSubmissionException(message);
            }

            // re-integrate offloaded data:
            try {
                tdd.loadBigData(taskExecutorBlobService.getPermanentBlobService());
            } catch (IOException | ClassNotFoundException e) {
                throw new TaskSubmissionException(
                        "Could not re-integrate offloaded TaskDeploymentDescriptor data.", e);
            }

            // deserialize the pre-serialized information
            final JobInformation jobInformation;
            final TaskInformation taskInformation;
            try {
                jobInformation =
                        tdd.getSerializedJobInformation()
                                .deserializeValue(getClass().getClassLoader());
                taskInformation =
                        tdd.getSerializedTaskInformation()
                                .deserializeValue(getClass().getClassLoader());
            } catch (IOException | ClassNotFoundException e) {
                throw new TaskSubmissionException(
                        "Could not deserialize the job or task information.", e);
            }

            if (!jobId.equals(jobInformation.getJobId())) {
                throw new TaskSubmissionException(
                        "Inconsistent job ID information inside TaskDeploymentDescriptor ("
                                + tdd.getJobId()
                                + " vs. "
                                + jobInformation.getJobId()
                                + ")");
            }

            TaskManagerJobMetricGroup jobGroup =
                    taskManagerMetricGroup.addJob(
                            jobInformation.getJobId(), jobInformation.getJobName());

            // note that a pre-existing job group can NOT be closed concurrently - this is done by
            // the same TM thread in removeJobMetricsGroup
            TaskMetricGroup taskMetricGroup =
                    jobGroup.addTask(tdd.getExecutionAttemptId(), taskInformation.getTaskName());

            InputSplitProvider inputSplitProvider =
                    new RpcInputSplitProvider(
                            jobManagerConnection.getJobManagerGateway(),
                            taskInformation.getJobVertexId(),
                            tdd.getExecutionAttemptId(),
                            taskManagerConfiguration.getRpcTimeout());

            final TaskOperatorEventGateway taskOperatorEventGateway =
                    new RpcTaskOperatorEventGateway(
                            jobManagerConnection.getJobManagerGateway(),
                            executionAttemptID,
                            (t) -> runAsync(() -> failTask(executionAttemptID, t)));

            TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
            CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
            GlobalAggregateManager aggregateManager =
                    jobManagerConnection.getGlobalAggregateManager();

            LibraryCacheManager.ClassLoaderHandle classLoaderHandle =
                    jobManagerConnection.getClassLoaderHandle();
            PartitionProducerStateChecker partitionStateChecker =
                    jobManagerConnection.getPartitionStateChecker();

            final TaskLocalStateStore localStateStore =
                    localStateStoresManager.localStateStoreForSubtask(
                            jobId,
                            tdd.getAllocationId(),
                            taskInformation.getJobVertexId(),
                            tdd.getSubtaskIndex(),
                            taskManagerConfiguration.getConfiguration(),
                            jobInformation.getJobConfiguration());

            // TODO: Pass config value from user program and do overriding here.
            final StateChangelogStorage changelogStorage;
            try {
                changelogStorage =
                        changelogStoragesManager.stateChangelogStorageForJob(
                                jobId,
                                taskManagerConfiguration.getConfiguration(),
                                jobGroup,
                                localStateStore.getLocalRecoveryConfig());
            } catch (IOException e) {
                throw new TaskSubmissionException(e);
            }

            final JobManagerTaskRestore taskRestore = tdd.getTaskRestore();

            final TaskStateManager taskStateManager =
                    new TaskStateManagerImpl(
                            jobId,
                            tdd.getExecutionAttemptId(),
                            localStateStore,
                            changelogStorage,
                            changelogStoragesManager,
                            taskRestore,
                            checkpointResponder);

            MemoryManager memoryManager;
            try {
                memoryManager = taskSlotTable.getTaskMemoryManager(tdd.getAllocationId());
            } catch (SlotNotFoundException e) {
                throw new TaskSubmissionException("Could not submit task.", e);
            }

            Task task =
                    new Task(
                            jobInformation,
                            taskInformation,
                            tdd.getExecutionAttemptId(),
                            tdd.getAllocationId(),
                            tdd.getProducedPartitions(),
                            tdd.getInputGates(),
                            memoryManager,
                            sharedResources,
                            taskExecutorServices.getIOManager(),
                            taskExecutorServices.getShuffleEnvironment(),
                            taskExecutorServices.getKvStateService(),
                            taskExecutorServices.getBroadcastVariableManager(),
                            taskExecutorServices.getTaskEventDispatcher(),
                            externalResourceInfoProvider,
                            taskStateManager,
                            taskManagerActions,
                            inputSplitProvider,
                            checkpointResponder,
                            taskOperatorEventGateway,
                            aggregateManager,
                            classLoaderHandle,
                            fileCache,
                            taskManagerConfiguration,
                            taskMetricGroup,
                            partitionStateChecker,
                            getRpcService().getScheduledExecutor(),
                            channelStateExecutorFactoryManager.getOrCreateExecutorFactory(jobId));

            taskMetricGroup.gauge(MetricNames.IS_BACK_PRESSURED, task::isBackPressured);

            log.info(
                    "Received task {} ({}), deploy into slot with allocation id {}.",
                    task.getTaskInfo().getTaskNameWithSubtasks(),
                    tdd.getExecutionAttemptId(),
                    tdd.getAllocationId());

            boolean taskAdded;

            try {
                taskAdded = taskSlotTable.addTask(task);
            } catch (SlotNotFoundException | SlotNotActiveException e) {
                throw new TaskSubmissionException("Could not submit task.", e);
            }

            if (taskAdded) {
                task.startTaskThread();

                setupResultPartitionBookkeeping(
                        tdd.getJobId(), tdd.getProducedPartitions(), task.getTerminationFuture());
                return CompletableFuture.completedFuture(Acknowledge.get());
            } else {
                final String message =
                        "TaskManager already contains a task for id " + task.getExecutionId() + '.';

                log.debug(message);
                throw new TaskSubmissionException(message);
            }
        } catch (TaskSubmissionException e) {
            return FutureUtils.completedExceptionally(e);
        }
    }

    private void setupResultPartitionBookkeeping(
            JobID jobId,
            Collection producedResultPartitions,
            CompletableFuture terminationFuture) {
        final Set partitionsRequiringRelease =
                filterPartitionsRequiringRelease(producedResultPartitions)
                        .peek(
                                rpdd ->
                                        partitionTracker.startTrackingPartition(
                                                jobId, TaskExecutorPartitionInfo.from(rpdd)))
                        .map(ResultPartitionDeploymentDescriptor::getShuffleDescriptor)
                        .map(ShuffleDescriptor::getResultPartitionID)
                        .collect(Collectors.toSet());

        final CompletableFuture taskTerminationWithResourceCleanupFuture =
                terminationFuture.thenApplyAsync(
                        executionState -> {
                            if (executionState != ExecutionState.FINISHED) {
                                partitionTracker.stopTrackingPartitions(partitionsRequiringRelease);
                            }
                            return executionState;
                        },
                        getMainThreadExecutor());

        taskResultPartitionCleanupFuturesPerJob.compute(
                jobId,
                (ignored, completableFutures) -> {
                    if (completableFutures == null) {
                        completableFutures = new ArrayList<>(4);
                    }

                    completableFutures.add(taskTerminationWithResourceCleanupFuture);
                    return completableFutures;
                });
    }

    private Stream filterPartitionsRequiringRelease(
            Collection producedResultPartitions) {
        return producedResultPartitions.stream()
                // only releaseByScheduler partitions require explicit release call
                .filter(d -> d.getPartitionType().isReleaseByScheduler())
                // partitions without local resources don't store anything on the TaskExecutor
                .filter(d -> d.getShuffleDescriptor().storesLocalResourcesOn().isPresent());
    }

    @Override
    public CompletableFuture cancelTask(
            ExecutionAttemptID executionAttemptID, Time timeout) {
        final Task task = taskSlotTable.getTask(executionAttemptID);

        if (task != null) {
            try {
                task.cancelExecution();
                return CompletableFuture.completedFuture(Acknowledge.get());
            } catch (Throwable t) {
                return FutureUtils.completedExceptionally(
                        new TaskException(
                                "Cannot cancel task for execution " + executionAttemptID + '.', t));
            }
        } else {
            final String message =
                    "Cannot find task to stop for execution " + executionAttemptID + '.';

            log.debug(message);
            return FutureUtils.completedExceptionally(new TaskException(message));
        }
    }

    // ----------------------------------------------------------------------
    // Partition lifecycle RPCs
    // ----------------------------------------------------------------------

    @Override
    public CompletableFuture updatePartitions(
            final ExecutionAttemptID executionAttemptID,
            Iterable partitionInfos,
            Time timeout) {
        final Task task = taskSlotTable.getTask(executionAttemptID);

        if (task != null) {
            for (final PartitionInfo partitionInfo : partitionInfos) {
                // Run asynchronously because it might be blocking
                FutureUtils.assertNoException(
                        CompletableFuture.runAsync(
                                () -> {
                                    try {
                                        if (!shuffleEnvironment.updatePartitionInfo(
                                                executionAttemptID, partitionInfo)) {
                                            log.debug(
                                                    "Discard update for input gate partition {} of result {} in task {}. "
                                                            + "The partition is no longer available.",
                                                    partitionInfo
                                                            .getShuffleDescriptor()
                                                            .getResultPartitionID(),
                                                    partitionInfo.getIntermediateDataSetID(),
                                                    executionAttemptID);
                                        }
                                    } catch (IOException | InterruptedException e) {
                                        log.error(
                                                "Could not update input data location for task {}. Trying to fail task.",
                                                task.getTaskInfo().getTaskName(),
                                                e);
                                        task.failExternally(e);
                                    }
                                },
                                getRpcService().getScheduledExecutor()));
            }
            return CompletableFuture.completedFuture(Acknowledge.get());
        } else {
            log.debug(
                    "Discard update for input partitions of task {}. Task is no longer running.",
                    executionAttemptID);
            return CompletableFuture.completedFuture(Acknowledge.get());
        }
    }

    @Override
    public void releasePartitions(JobID jobId, Set partitionIds) {
        try {
            partitionTracker.stopTrackingAndReleaseJobPartitions(partitionIds);
            closeJobManagerConnectionIfNoAllocatedResources(jobId);
        } catch (Throwable t) {
            onFatalError(t);
        }
    }

    @Override
    public CompletableFuture promotePartitions(
            JobID jobId, Set partitionIds) {
        CompletableFuture future = new CompletableFuture<>();
        try {
            partitionTracker.promoteJobPartitions(partitionIds);
            if (establishedResourceManagerConnection != null) {
                establishedResourceManagerConnection
                        .getResourceManagerGateway()
                        .reportClusterPartitions(
                                getResourceID(), partitionTracker.createClusterPartitionReport())
                        .thenAccept(ignore -> future.complete(Acknowledge.get()));
            } else {
                future.completeExceptionally(
                        new RuntimeException(
                                "Task executor is not connecting to ResourceManager. "
                                        + "Fail to report cluster partition to ResourceManager"));
            }
            closeJobManagerConnectionIfNoAllocatedResources(jobId);
        } catch (Throwable t) {
            future.completeExceptionally(t);
            onFatalError(t);
        }

        return future;
    }

    @Override
    public CompletableFuture releaseClusterPartitions(
            Collection dataSetsToRelease, Time timeout) {
        partitionTracker.stopTrackingAndReleaseClusterPartitions(dataSetsToRelease);
        return CompletableFuture.completedFuture(Acknowledge.get());
    }

    // ----------------------------------------------------------------------
    // Heartbeat RPC
    // ----------------------------------------------------------------------

    @Override
    public CompletableFuture heartbeatFromJobManager(
            ResourceID resourceID, AllocatedSlotReport allocatedSlotReport) {
        return jobManagerHeartbeatManager.requestHeartbeat(resourceID, allocatedSlotReport);
    }

    @Override
    public CompletableFuture heartbeatFromResourceManager(ResourceID resourceID) {
        return resourceManagerHeartbeatManager.requestHeartbeat(resourceID, null);
    }

    // ----------------------------------------------------------------------
    // Checkpointing RPCs
    // ----------------------------------------------------------------------

    @Override
    public CompletableFuture triggerCheckpoint(
            ExecutionAttemptID executionAttemptID,
            long checkpointId,
            long checkpointTimestamp,
            CheckpointOptions checkpointOptions) {
        log.debug(
                "Trigger checkpoint {}@{} for {}.",
                checkpointId,
                checkpointTimestamp,
                executionAttemptID);

        final Task task = taskSlotTable.getTask(executionAttemptID);

        if (task != null) {
            task.triggerCheckpointBarrier(checkpointId, checkpointTimestamp, checkpointOptions);

            return CompletableFuture.completedFuture(Acknowledge.get());
        } else {
            final String message =
                    "TaskManager received a checkpoint request for unknown task "
                            + executionAttemptID
                            + '.';

            log.debug(message);
            return FutureUtils.completedExceptionally(
                    new CheckpointException(
                            message, CheckpointFailureReason.TASK_CHECKPOINT_FAILURE));
        }
    }

    @Override
    public CompletableFuture confirmCheckpoint(
            ExecutionAttemptID executionAttemptID,
            long completedCheckpointId,
            long completedCheckpointTimestamp,
            long lastSubsumedCheckpointId) {
        log.debug(
                "Confirm completed checkpoint {}@{} and last subsumed checkpoint {} for {}.",
                completedCheckpointId,
                completedCheckpointTimestamp,
                lastSubsumedCheckpointId,
                executionAttemptID);

        final Task task = taskSlotTable.getTask(executionAttemptID);

        if (task != null) {
            task.notifyCheckpointComplete(completedCheckpointId);

            task.notifyCheckpointSubsumed(lastSubsumedCheckpointId);
            return CompletableFuture.completedFuture(Acknowledge.get());
        } else {
            final String message =
                    "TaskManager received a checkpoint confirmation for unknown task "
                            + executionAttemptID
                            + '.';

            log.debug(message);
            return FutureUtils.completedExceptionally(
                    new CheckpointException(
                            message,
                            CheckpointFailureReason.UNKNOWN_TASK_CHECKPOINT_NOTIFICATION_FAILURE));
        }
    }

    @Override
    public CompletableFuture abortCheckpoint(
            ExecutionAttemptID executionAttemptID,
            long checkpointId,
            long latestCompletedCheckpointId,
            long checkpointTimestamp) {
        log.debug(
                "Abort checkpoint {}@{} for {}.",
                checkpointId,
                checkpointTimestamp,
                executionAttemptID);

        final Task task = taskSlotTable.getTask(executionAttemptID);

        if (task != null) {
            task.notifyCheckpointAborted(checkpointId, latestCompletedCheckpointId);

            return CompletableFuture.completedFuture(Acknowledge.get());
        } else {
            final String message =
                    "TaskManager received an aborted checkpoint for unknown task "
                            + executionAttemptID
                            + '.';

            log.debug(message);
            return FutureUtils.completedExceptionally(
                    new CheckpointException(
                            message,
                            CheckpointFailureReason.UNKNOWN_TASK_CHECKPOINT_NOTIFICATION_FAILURE));
        }
    }

    // ----------------------------------------------------------------------
    // Slot allocation RPCs
    // ----------------------------------------------------------------------

    @Override
    public CompletableFuture requestSlot(
            final SlotID slotId,
            final JobID jobId,
            final AllocationID allocationId,
            final ResourceProfile resourceProfile,
            final String targetAddress,
            final ResourceManagerId resourceManagerId,
            final Time timeout) {
        // TODO: Filter invalid requests from the resource manager by using the
        // instance/registration Id

        log.info(
                "Receive slot request {} for job {} from resource manager with leader id {}.",
                allocationId,
                jobId,
                resourceManagerId);

        if (!isConnectedToResourceManager(resourceManagerId)) {
            final String message =
                    String.format(
                            "TaskManager is not connected to the resource manager %s.",
                            resourceManagerId);
            log.debug(message);
            return FutureUtils.completedExceptionally(new TaskManagerException(message));
        }

        tryPersistAllocationSnapshot(
                new SlotAllocationSnapshot(
                        slotId, jobId, targetAddress, allocationId, resourceProfile));

        try {
            final boolean isConnected =
                    allocateSlotForJob(jobId, slotId, allocationId, resourceProfile, targetAddress);

            if (isConnected) {
                offerSlotsToJobManager(jobId);
            }

            return CompletableFuture.completedFuture(Acknowledge.get());
        } catch (SlotAllocationException e) {
            log.debug("Could not allocate slot for allocation id {}.", allocationId, e);
            return FutureUtils.completedExceptionally(e);
        }
    }

    private boolean allocateSlotForJob(
            JobID jobId,
            SlotID slotId,
            AllocationID allocationId,
            ResourceProfile resourceProfile,
            String targetAddress)
            throws SlotAllocationException {
        allocateSlot(slotId, jobId, allocationId, resourceProfile);

        final JobTable.Job job;

        try {
            job =
                    jobTable.getOrCreateJob(
                            jobId, () -> registerNewJobAndCreateServices(jobId, targetAddress));
        } catch (Exception e) {
            // free the allocated slot
            try {
                taskSlotTable.freeSlot(allocationId);
            } catch (SlotNotFoundException slotNotFoundException) {
                // slot no longer existent, this should actually never happen, because we've
                // just allocated the slot. So let's fail hard in this case!
                onFatalError(slotNotFoundException);
            }

            // release local state under the allocation id.
            localStateStoresManager.releaseLocalStateForAllocationId(allocationId);

            // sanity check
            if (!taskSlotTable.isSlotFree(slotId.getSlotNumber())) {
                onFatalError(new Exception("Could not free slot " + slotId));
            }

            throw new SlotAllocationException("Could not create new job.", e);
        }

        return job.isConnected();
    }

    private TaskExecutorJobServices registerNewJobAndCreateServices(
            JobID jobId, String targetAddress) throws Exception {
        jobLeaderService.addJob(jobId, targetAddress);
        final JobPermanentBlobService permanentBlobService =
                taskExecutorBlobService.getPermanentBlobService();
        permanentBlobService.registerJob(jobId);

        return TaskExecutorJobServices.create(
                libraryCacheManager.registerClassLoaderLease(jobId),
                () -> permanentBlobService.releaseJob(jobId));
    }

    private void allocateSlot(
            SlotID slotId, JobID jobId, AllocationID allocationId, ResourceProfile resourceProfile)
            throws SlotAllocationException {
        if (taskSlotTable.isSlotFree(slotId.getSlotNumber())) {
            if (taskSlotTable.allocateSlot(
                    slotId.getSlotNumber(),
                    jobId,
                    allocationId,
                    resourceProfile,
                    taskManagerConfiguration.getSlotTimeout())) {
                log.info("Allocated slot for {}.", allocationId);
            } else {
                log.info("Could not allocate slot for {}.", allocationId);
                throw new SlotAllocationException("Could not allocate slot.");
            }
        } else if (!taskSlotTable.isAllocated(slotId.getSlotNumber(), jobId, allocationId)) {
            final String message =
                    "The slot " + slotId + " has already been allocated for a different job.";

            log.info(message);

            final AllocationID allocationID =
                    taskSlotTable.getCurrentAllocation(slotId.getSlotNumber());
            throw new SlotOccupiedException(
                    message, allocationID, taskSlotTable.getOwningJob(allocationID));
        }
    }

    @Override
    public CompletableFuture freeSlot(
            AllocationID allocationId, Throwable cause, Time timeout) {
        freeSlotInternal(allocationId, cause);
        return CompletableFuture.completedFuture(Acknowledge.get());
    }

    @Override
    public void freeInactiveSlots(JobID jobId, Time timeout) {
        log.debug("Freeing inactive slots for job {}.", jobId);

        // need a copy to prevent ConcurrentModificationExceptions
        final ImmutableList> inactiveSlots =
                ImmutableList.copyOf(taskSlotTable.getAllocatedSlots(jobId));
        for (TaskSlot slot : inactiveSlots) {
            freeSlotInternal(
                    slot.getAllocationId(),
                    new FlinkException("Slot was re-claimed by resource manager."));
        }
    }

    @Override
    public CompletableFuture requestFileUploadByType(
            FileType fileType, Time timeout) {
        final String filePath;
        switch (fileType) {
            case LOG:
                filePath = taskManagerConfiguration.getTaskManagerLogPath();
                break;
            case STDOUT:
                filePath = taskManagerConfiguration.getTaskManagerStdoutPath();
                break;
            default:
                filePath = null;
        }
        return requestFileUploadByFilePath(filePath, fileType.toString());
    }

    @Override
    public CompletableFuture requestFileUploadByName(
            String fileName, Time timeout) {
        final String filePath;
        final String logDir = taskManagerConfiguration.getTaskManagerLogDir();
        if (StringUtils.isNullOrWhitespaceOnly(logDir)
                || StringUtils.isNullOrWhitespaceOnly(fileName)) {
            filePath = null;
        } else {
            filePath = new File(logDir, new File(fileName).getName()).getPath();
        }
        return requestFileUploadByFilePath(filePath, fileName);
    }

    @Override
    public CompletableFuture> requestMetricQueryServiceAddress(
            Time timeout) {
        return CompletableFuture.completedFuture(
                SerializableOptional.ofNullable(metricQueryServiceAddress));
    }

    // ----------------------------------------------------------------------
    // Disconnection RPCs
    // ----------------------------------------------------------------------

    @Override
    public void disconnectJobManager(JobID jobId, Exception cause) {
        jobTable.getConnection(jobId)
                .ifPresent(
                        jobManagerConnection ->
                                disconnectAndTryReconnectToJobManager(jobManagerConnection, cause));
    }

    private void disconnectAndTryReconnectToJobManager(
            JobTable.Connection jobManagerConnection, Exception cause) {
        disconnectJobManagerConnection(jobManagerConnection, cause);
        jobLeaderService.reconnect(jobManagerConnection.getJobId());
    }

    @Override
    public void disconnectResourceManager(Exception cause) {
        if (isRunning()) {
            reconnectToResourceManager(cause);
        }
    }

    // ----------------------------------------------------------------------
    // Other RPCs
    // ----------------------------------------------------------------------

    @Override
    public CompletableFuture sendOperatorEventToTask(
            ExecutionAttemptID executionAttemptID,
            OperatorID operatorId,
            SerializedValue evt) {

        log.debug("Operator event for {} - {}", executionAttemptID, operatorId);

        final Task task = taskSlotTable.getTask(executionAttemptID);
        if (task == null) {
            return FutureUtils.completedExceptionally(
                    new TaskNotRunningException(
                            "Task " + executionAttemptID + " not running on TaskManager"));
        }

        try {
            task.deliverOperatorEvent(operatorId, evt);
            return CompletableFuture.completedFuture(Acknowledge.get());
        } catch (Throwable t) {
            ExceptionUtils.rethrowIfFatalError(t);
            return FutureUtils.completedExceptionally(t);
        }
    }

    @Override
    public CompletableFuture requestThreadDump(Time timeout) {
        int stacktraceMaxDepth =
                taskManagerConfiguration
                        .getConfiguration()
                        .get(ClusterOptions.THREAD_DUMP_STACKTRACE_MAX_DEPTH);
        return CompletableFuture.completedFuture(ThreadDumpInfo.dumpAndCreate(stacktraceMaxDepth));
    }

    @Override
    public CompletableFuture updateDelegationTokens(
            ResourceManagerId resourceManagerId, byte[] tokens) {
        log.info(
                "Receive update delegation tokens from resource manager with leader id {}.",
                resourceManagerId);

        if (!isConnectedToResourceManager(resourceManagerId)) {
            final String message =
                    String.format(
                            "TaskManager is not connected to the resource manager %s.",
                            resourceManagerId);
            log.debug(message);
            return FutureUtils.completedExceptionally(new TaskManagerException(message));
        }

        try {
            delegationTokenReceiverRepository.onNewTokensObtained(tokens);
            return CompletableFuture.completedFuture(Acknowledge.get());
        } catch (Throwable t) {
            log.error("Could not update delegation tokens.", t);
            ExceptionUtils.rethrowIfFatalError(t);
            return FutureUtils.completedExceptionally(t);
        }
    }

    // ------------------------------------------------------------------------
    //  Internal resource manager connection methods
    // ------------------------------------------------------------------------

    private void notifyOfNewResourceManagerLeader(
            String newLeaderAddress, ResourceManagerId newResourceManagerId) {
        resourceManagerAddress =
                createResourceManagerAddress(newLeaderAddress, newResourceManagerId);
        reconnectToResourceManager(
                new FlinkException(
                        String.format(
                                "ResourceManager leader changed to new address %s",
                                resourceManagerAddress)));
    }

    @Nullable
    private ResourceManagerAddress createResourceManagerAddress(
            @Nullable String newLeaderAddress, @Nullable ResourceManagerId newResourceManagerId) {
        if (newLeaderAddress == null) {
            return null;
        } else {
            assert (newResourceManagerId != null);
            return new ResourceManagerAddress(newLeaderAddress, newResourceManagerId);
        }
    }

    private void reconnectToResourceManager(Exception cause) {
        closeResourceManagerConnection(cause);
        startRegistrationTimeout();
        tryConnectToResourceManager();
    }

    private void tryConnectToResourceManager() {
        if (resourceManagerAddress != null) {
            connectToResourceManager();
        }
    }

    private void connectToResourceManager() {
        assert (resourceManagerAddress != null);
        assert (establishedResourceManagerConnection == null);
        assert (resourceManagerConnection == null);

        log.info("Connecting to ResourceManager {}.", resourceManagerAddress);

        final TaskExecutorRegistration taskExecutorRegistration =
                new TaskExecutorRegistration(
                        getAddress(),
                        getResourceID(),
                        unresolvedTaskManagerLocation.getDataPort(),
                        JMXService.getPort().orElse(-1),
                        hardwareDescription,
                        memoryConfiguration,
                        taskManagerConfiguration.getDefaultSlotResourceProfile(),
                        taskManagerConfiguration.getTotalResourceProfile(),
                        unresolvedTaskManagerLocation.getNodeId());

        resourceManagerConnection =
                new TaskExecutorToResourceManagerConnection(
                        log,
                        getRpcService(),
                        taskManagerConfiguration.getRetryingRegistrationConfiguration(),
                        resourceManagerAddress.getAddress(),
                        resourceManagerAddress.getResourceManagerId(),
                        getMainThreadExecutor(),
                        new ResourceManagerRegistrationListener(),
                        taskExecutorRegistration);
        resourceManagerConnection.start();
    }

    private void establishResourceManagerConnection(
            ResourceManagerGateway resourceManagerGateway,
            ResourceID resourceManagerResourceId,
            InstanceID taskExecutorRegistrationId,
            ClusterInformation clusterInformation) {

        final CompletableFuture slotReportResponseFuture =
                resourceManagerGateway.sendSlotReport(
                        getResourceID(),
                        taskExecutorRegistrationId,
                        taskSlotTable.createSlotReport(getResourceID()),
                        taskManagerConfiguration.getRpcTimeout());

        slotReportResponseFuture.whenCompleteAsync(
                (acknowledge, throwable) -> {
                    if (throwable != null) {
                        reconnectToResourceManager(
                                new TaskManagerException(
                                        "Failed to send initial slot report to ResourceManager.",
                                        throwable));
                    }
                },
                getMainThreadExecutor());

        // monitor the resource manager as heartbeat target
        resourceManagerHeartbeatManager.monitorTarget(
                resourceManagerResourceId,
                new ResourceManagerHeartbeatReceiver(resourceManagerGateway));

        // set the propagated blob server address
        final InetSocketAddress blobServerAddress =
                new InetSocketAddress(
                        clusterInformation.getBlobServerHostname(),
                        clusterInformation.getBlobServerPort());

        taskExecutorBlobService.setBlobServerAddress(blobServerAddress);

        establishedResourceManagerConnection =
                new EstablishedResourceManagerConnection(
                        resourceManagerGateway,
                        resourceManagerResourceId,
                        taskExecutorRegistrationId);

        stopRegistrationTimeout();
    }

    private void closeResourceManagerConnection(Exception cause) {
        if (establishedResourceManagerConnection != null) {
            final ResourceID resourceManagerResourceId =
                    establishedResourceManagerConnection.getResourceManagerResourceId();

            log.info(
                    "Close ResourceManager connection {}.",
                    resourceManagerResourceId,
                    ExceptionUtils.returnExceptionIfUnexpected(cause.getCause()));
            ExceptionUtils.logExceptionIfExcepted(cause.getCause(), log);
            resourceManagerHeartbeatManager.unmonitorTarget(resourceManagerResourceId);

            ResourceManagerGateway resourceManagerGateway =
                    establishedResourceManagerConnection.getResourceManagerGateway();
            resourceManagerGateway.disconnectTaskManager(getResourceID(), cause);

            establishedResourceManagerConnection = null;

            partitionTracker.stopTrackingAndReleaseAllClusterPartitions();
        }

        if (resourceManagerConnection != null) {
            if (!resourceManagerConnection.isConnected()) {
                if (log.isDebugEnabled()) {
                    log.debug(
                            "Terminating registration attempts towards ResourceManager {}.",
                            resourceManagerConnection.getTargetAddress(),
                            cause);
                } else {
                    log.info(
                            "Terminating registration attempts towards ResourceManager {}.",
                            resourceManagerConnection.getTargetAddress());
                }
            }

            resourceManagerConnection.close();
            resourceManagerConnection = null;
        }
    }

    private void startRegistrationTimeout() {
        final Duration maxRegistrationDuration =
                taskManagerConfiguration.getMaxRegistrationDuration();

        if (maxRegistrationDuration != null) {
            final UUID newRegistrationTimeoutId = UUID.randomUUID();
            currentRegistrationTimeoutId = newRegistrationTimeoutId;
            scheduleRunAsync(
                    () -> registrationTimeout(newRegistrationTimeoutId), maxRegistrationDuration);
        }
    }

    private void stopRegistrationTimeout() {
        currentRegistrationTimeoutId = null;
    }

    private void registrationTimeout(@Nonnull UUID registrationTimeoutId) {
        if (registrationTimeoutId.equals(currentRegistrationTimeoutId)) {
            final Duration maxRegistrationDuration =
                    taskManagerConfiguration.getMaxRegistrationDuration();

            onFatalError(
                    new RegistrationTimeoutException(
                            String.format(
                                    "Could not register at the ResourceManager within the specified maximum "
                                            + "registration duration %s. This indicates a problem with this instance. Terminating now.",
                                    maxRegistrationDuration)));
        }
    }

    // ------------------------------------------------------------------------
    //  Internal job manager connection methods
    // ------------------------------------------------------------------------

    private void offerSlotsToJobManager(final JobID jobId) {
        jobTable.getConnection(jobId).ifPresent(this::internalOfferSlotsToJobManager);
    }

    private void internalOfferSlotsToJobManager(JobTable.Connection jobManagerConnection) {
        final JobID jobId = jobManagerConnection.getJobId();

        if (taskSlotTable.hasAllocatedSlots(jobId)) {
            log.info("Offer reserved slots to the leader of job {}.", jobId);

            final JobMasterGateway jobMasterGateway = jobManagerConnection.getJobManagerGateway();

            final Iterator> reservedSlotsIterator =
                    taskSlotTable.getAllocatedSlots(jobId);
            final JobMasterId jobMasterId = jobManagerConnection.getJobMasterId();

            final Collection reservedSlots = new HashSet<>(2);

            while (reservedSlotsIterator.hasNext()) {
                SlotOffer offer = reservedSlotsIterator.next().generateSlotOffer();
                reservedSlots.add(offer);
            }

            final UUID slotOfferId = UUID.randomUUID();
            currentSlotOfferPerJob.put(jobId, slotOfferId);

            CompletableFuture> acceptedSlotsFuture =
                    jobMasterGateway.offerSlots(
                            getResourceID(),
                            reservedSlots,
                            taskManagerConfiguration.getRpcTimeout());

            acceptedSlotsFuture.whenCompleteAsync(
                    handleAcceptedSlotOffers(
                            jobId, jobMasterGateway, jobMasterId, reservedSlots, slotOfferId),
                    getMainThreadExecutor());
        } else {
            log.debug("There are no unassigned slots for the job {}.", jobId);
        }
    }

    @Nonnull
    private BiConsumer, Throwable> handleAcceptedSlotOffers(
            JobID jobId,
            JobMasterGateway jobMasterGateway,
            JobMasterId jobMasterId,
            Collection offeredSlots,
            UUID offerId) {
        return (Iterable acceptedSlots, Throwable throwable) -> {
            // check if this is the latest offer
            if (!offerId.equals(currentSlotOfferPerJob.get(jobId))) {
                // If this offer is outdated then it can be safely ignored.
                // If the response for a given slot is identical in both offers (accepted/rejected),
                // then this is naturally the case since the end-result is the same.
                // If the responses differ, then there are 2 cases to consider:
                // 1) initially rejected, later accepted
                //   This can happen when the resource requirements of a job increases between
                //   offers.
                //   In this case the first response MUST be ignored, so that
                //   the slot can be properly activated when the second response arrives.
                // 2) initially accepted, later rejected
                //   This can happen when the resource requirements of a job decrease between
                //   offers.
                //   In this case the first response MAY be ignored, because the job no longer
                //   requires the slot (and already has initiated steps to free it) and we can thus
                //   assume that any in-flight task submissions are no longer relevant for the job
                //   execution.

                log.debug(
                        "Discard slot offer response since there is a newer offer for the job {}.",
                        jobId);
                return;
            }

            if (throwable != null) {
                if (throwable instanceof TimeoutException) {
                    log.info(
                            "Slot offering to JobManager did not finish in time. Retrying the slot offering.");
                    // We ran into a timeout. Try again.
                    offerSlotsToJobManager(jobId);
                } else {
                    log.warn(
                            "Slot offering to JobManager failed. Freeing the slots "
                                    + "and returning them to the ResourceManager.",
                            throwable);

                    // We encountered an exception. Free the slots and return them to the RM.
                    for (SlotOffer reservedSlot : offeredSlots) {
                        freeSlotInternal(reservedSlot.getAllocationId(), throwable);
                    }
                }
            } else {
                // check if the response is still valid
                if (isJobManagerConnectionValid(jobId, jobMasterId)) {
                    // mark accepted slots active
                    for (SlotOffer acceptedSlot : acceptedSlots) {
                        final AllocationID allocationId = acceptedSlot.getAllocationId();
                        try {
                            if (!taskSlotTable.markSlotActive(allocationId)) {
                                // the slot is either free or releasing at the moment
                                final String message =
                                        "Could not mark slot " + allocationId + " active.";
                                log.debug(message);
                                jobMasterGateway.failSlot(
                                        getResourceID(), allocationId, new FlinkException(message));
                            }
                        } catch (SlotNotFoundException e) {
                            final String message =
                                    "Could not mark slot " + allocationId + " active.";
                            jobMasterGateway.failSlot(
                                    getResourceID(), allocationId, new FlinkException(message));
                        }

                        offeredSlots.remove(acceptedSlot);
                    }

                    final Exception e = new Exception("The slot was rejected by the JobManager.");

                    for (SlotOffer rejectedSlot : offeredSlots) {
                        freeSlotInternal(rejectedSlot.getAllocationId(), e);
                    }
                } else {
                    // discard the response since there is a new leader for the job
                    log.debug(
                            "Discard slot offer response since there is a new leader "
                                    + "for the job {}.",
                            jobId);
                }
            }
        };
    }

    private void establishJobManagerConnection(
            JobTable.Job job,
            final JobMasterGateway jobMasterGateway,
            JMTMRegistrationSuccess registrationSuccess) {

        final JobID jobId = job.getJobId();
        final Optional connection = job.asConnection();

        if (connection.isPresent()) {
            JobTable.Connection oldJobManagerConnection = connection.get();

            if (Objects.equals(
                    oldJobManagerConnection.getJobMasterId(), jobMasterGateway.getFencingToken())) {
                // we already are connected to the given job manager
                log.debug(
                        "Ignore JobManager gained leadership message for {} because we are already connected to it.",
                        jobMasterGateway.getFencingToken());
                return;
            } else {
                disconnectJobManagerConnection(
                        oldJobManagerConnection,
                        new Exception("Found new job leader for job id " + jobId + '.'));
            }
        }

        log.info("Establish JobManager connection for job {}.", jobId);

        ResourceID jobManagerResourceID = registrationSuccess.getResourceID();

        final JobTable.Connection establishedConnection =
                associateWithJobManager(job, jobManagerResourceID, jobMasterGateway);

        // monitor the job manager as heartbeat target
        jobManagerHeartbeatManager.monitorTarget(
                jobManagerResourceID, new JobManagerHeartbeatReceiver(jobMasterGateway));

        internalOfferSlotsToJobManager(establishedConnection);
    }

    private void closeJob(JobTable.Job job, Exception cause) {
        job.asConnection()
                .ifPresent(
                        jobManagerConnection ->
                                disconnectJobManagerConnection(jobManagerConnection, cause));

        job.close();
    }

    private void disconnectJobManagerConnection(
            JobTable.Connection jobManagerConnection, Exception cause) {
        final JobID jobId = jobManagerConnection.getJobId();
        log.info(
                "Close JobManager connection for job {}.",
                jobId,
                ExceptionUtils.returnExceptionIfUnexpected(cause.getCause()));
        ExceptionUtils.logExceptionIfExcepted(cause.getCause(), log);

        // 1. fail tasks running under this JobID
        Iterator tasks = taskSlotTable.getTasks(jobId);

        final FlinkException failureCause =
                new FlinkException(
                        String.format("Disconnect from JobManager responsible for %s.", jobId),
                        cause);

        while (tasks.hasNext()) {
            tasks.next().failExternally(failureCause);
        }

        // 2. Move the active slots to state allocated (possible to time out again)
        Set activeSlotAllocationIDs =
                taskSlotTable.getActiveTaskSlotAllocationIdsPerJob(jobId);

        final FlinkException freeingCause =
                new FlinkException("Slot could not be marked inactive.");

        for (AllocationID activeSlotAllocationID : activeSlotAllocationIDs) {
            try {
                if (!taskSlotTable.markSlotInactive(
                        activeSlotAllocationID, taskManagerConfiguration.getSlotTimeout())) {
                    freeSlotInternal(activeSlotAllocationID, freeingCause);
                }
            } catch (SlotNotFoundException e) {
                log.debug("Could not mark the slot {} inactive.", activeSlotAllocationID, e);
            }
        }

        // 3. Disassociate from the JobManager
        try {
            jobManagerHeartbeatManager.unmonitorTarget(jobManagerConnection.getResourceId());
            disassociateFromJobManager(jobManagerConnection, cause);
        } catch (IOException e) {
            log.warn(
                    "Could not properly disassociate from JobManager {}.",
                    jobManagerConnection.getJobManagerGateway().getAddress(),
                    e);
        }

        jobManagerConnection.disconnect();
    }

    private JobTable.Connection associateWithJobManager(
            JobTable.Job job, ResourceID resourceID, JobMasterGateway jobMasterGateway) {
        checkNotNull(resourceID);
        checkNotNull(jobMasterGateway);

        TaskManagerActions taskManagerActions = new TaskManagerActionsImpl(jobMasterGateway);

        CheckpointResponder checkpointResponder = new RpcCheckpointResponder(jobMasterGateway);
        GlobalAggregateManager aggregateManager = new RpcGlobalAggregateManager(jobMasterGateway);

        PartitionProducerStateChecker partitionStateChecker =
                new RpcPartitionStateChecker(jobMasterGateway);

        registerQueryableState(job.getJobId(), jobMasterGateway);

        return job.connect(
                resourceID,
                jobMasterGateway,
                taskManagerActions,
                checkpointResponder,
                aggregateManager,
                partitionStateChecker);
    }

    private void disassociateFromJobManager(
            JobTable.Connection jobManagerConnection, Exception cause) throws IOException {
        checkNotNull(jobManagerConnection);

        final JobID jobId = jobManagerConnection.getJobId();

        // cleanup remaining partitions once all tasks for this job have completed
        scheduleResultPartitionCleanup(jobId);

        final KvStateRegistry kvStateRegistry = kvStateService.getKvStateRegistry();

        if (kvStateRegistry != null) {
            kvStateRegistry.unregisterListener(jobId);
        }

        final KvStateClientProxy kvStateClientProxy = kvStateService.getKvStateClientProxy();

        if (kvStateClientProxy != null) {
            kvStateClientProxy.updateKvStateLocationOracle(jobManagerConnection.getJobId(), null);
        }

        JobMasterGateway jobManagerGateway = jobManagerConnection.getJobManagerGateway();
        jobManagerGateway.disconnectTaskManager(getResourceID(), cause);
    }

    private void handleRejectedJobManagerConnection(
            JobID jobId, String targetAddress, JMTMRegistrationRejection rejection) {
        log.info(
                "The JobManager under {} rejected the registration for job {}: {}. Releasing all job related resources.",
                targetAddress,
                jobId,
                rejection.getReason());

        releaseJobResources(
                jobId,
                new FlinkException(
                        String.format("JobManager %s has rejected the registration.", jobId)));
    }

    private void releaseJobResources(JobID jobId, Exception cause) {
        log.debug("Releasing job resources for job {}.", jobId, cause);

        if (partitionTracker.isTrackingPartitionsFor(jobId)) {
            // stop tracking job partitions
            partitionTracker.stopTrackingAndReleaseJobPartitionsFor(jobId);
        }

        // free slots
        final Set allocationIds = taskSlotTable.getAllocationIdsPerJob(jobId);

        if (!allocationIds.isEmpty()) {
            for (AllocationID allocationId : allocationIds) {
                freeSlotInternal(allocationId, cause);
            }
        }

        jobLeaderService.removeJob(jobId);
        jobTable.getJob(jobId)
                .ifPresent(
                        job -> {
                            closeJob(job, cause);
                        });
        taskManagerMetricGroup.removeJobMetricsGroup(jobId);
        changelogStoragesManager.releaseResourcesForJob(jobId);
        currentSlotOfferPerJob.remove(jobId);
        channelStateExecutorFactoryManager.releaseResourcesForJob(jobId);
    }

    private void scheduleResultPartitionCleanup(JobID jobId) {
        final Collection> taskTerminationFutures =
                taskResultPartitionCleanupFuturesPerJob.remove(jobId);
        if (taskTerminationFutures != null) {
            FutureUtils.waitForAll(taskTerminationFutures)
                    .thenRunAsync(
                            () -> {
                                partitionTracker.stopTrackingAndReleaseJobPartitionsFor(jobId);
                            },
                            getMainThreadExecutor());
        }
    }

    private void registerQueryableState(JobID jobId, JobMasterGateway jobMasterGateway) {
        final KvStateServer kvStateServer = kvStateService.getKvStateServer();
        final KvStateRegistry kvStateRegistry = kvStateService.getKvStateRegistry();

        if (kvStateServer != null && kvStateRegistry != null) {
            kvStateRegistry.registerListener(
                    jobId,
                    new RpcKvStateRegistryListener(
                            jobMasterGateway, kvStateServer.getServerAddress()));
        }

        final KvStateClientProxy kvStateProxy = kvStateService.getKvStateClientProxy();

        if (kvStateProxy != null) {
            kvStateProxy.updateKvStateLocationOracle(jobId, jobMasterGateway);
        }
    }

    // ------------------------------------------------------------------------
    //  Internal task methods
    // ------------------------------------------------------------------------

    private void failTask(final ExecutionAttemptID executionAttemptID, final Throwable cause) {
        final Task task = taskSlotTable.getTask(executionAttemptID);

        if (task != null) {
            try {
                task.failExternally(cause);
            } catch (Throwable t) {
                log.error("Could not fail task {}.", executionAttemptID, t);
            }
        } else {
            log.info(
                    "Cannot find task to fail for execution {} with exception:",
                    executionAttemptID,
                    cause);
        }
    }

    private void updateTaskExecutionState(
            final JobMasterGateway jobMasterGateway, final TaskExecutionState taskExecutionState) {
        final ExecutionAttemptID executionAttemptID = taskExecutionState.getID();

        CompletableFuture futureAcknowledge =
                jobMasterGateway.updateTaskExecutionState(taskExecutionState);

        futureAcknowledge.whenCompleteAsync(
                (ack, throwable) -> {
                    if (throwable != null) {
                        failTask(executionAttemptID, throwable);
                    }
                },
                getMainThreadExecutor());
    }

    private void unregisterTaskAndNotifyFinalState(
            final JobMasterGateway jobMasterGateway, final ExecutionAttemptID executionAttemptID) {

        Task task = taskSlotTable.removeTask(executionAttemptID);
        if (task != null) {
            if (!task.getExecutionState().isTerminal()) {
                try {
                    task.failExternally(
                            new IllegalStateException("Task is being remove from TaskManager."));
                } catch (Exception e) {
                    log.error("Could not properly fail task.", e);
                }
            }

            log.info(
                    "Un-registering task and sending final execution state {} to JobManager for task {} {}.",
                    task.getExecutionState(),
                    task.getTaskInfo().getTaskNameWithSubtasks(),
                    task.getExecutionId());

            AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot();

            updateTaskExecutionState(
                    jobMasterGateway,
                    new TaskExecutionState(
                            task.getExecutionId(),
                            task.getExecutionState(),
                            task.getFailureCause(),
                            accumulatorSnapshot,
                            task.getMetricGroup().getIOMetricGroup().createSnapshot()));
        } else {
            log.error("Cannot find task with ID {} to unregister.", executionAttemptID);
        }
    }

    private void freeSlotInternal(AllocationID allocationId, Throwable cause) {
        checkNotNull(allocationId);

        // only respond to freeing slots when not shutting down to avoid freeing slot allocation
        // information
        if (isRunning()) {
            log.debug(
                    "Free slot with allocation id {} because: {}",
                    allocationId,
                    cause.getMessage());

            try {
                final JobID jobId = taskSlotTable.getOwningJob(allocationId);

                final int slotIndex = taskSlotTable.freeSlot(allocationId, cause);

                slotAllocationSnapshotPersistenceService.deleteAllocationSnapshot(slotIndex);

                if (slotIndex != -1) {

                    if (isConnectedToResourceManager()) {
                        // the slot was freed. Tell the RM about it
                        ResourceManagerGateway resourceManagerGateway =
                                establishedResourceManagerConnection.getResourceManagerGateway();

                        resourceManagerGateway.notifySlotAvailable(
                                establishedResourceManagerConnection
                                        .getTaskExecutorRegistrationId(),
                                new SlotID(getResourceID(), slotIndex),
                                allocationId);
                    }

                    if (jobId != null) {
                        closeJobManagerConnectionIfNoAllocatedResources(jobId);
                    }
                }
            } catch (SlotNotFoundException e) {
                log.debug("Could not free slot for allocation id {}.", allocationId, e);
            }

            localStateStoresManager.releaseLocalStateForAllocationId(allocationId);
        } else {
            log.debug(
                    "Ignoring the freeing of slot {} because the TaskExecutor is shutting down.",
                    allocationId);
        }
    }

    private void closeJobManagerConnectionIfNoAllocatedResources(JobID jobId) {
        // check whether we still have allocated slots for the same job
        if (taskSlotTable.getAllocationIdsPerJob(jobId).isEmpty()
                && !partitionTracker.isTrackingPartitionsFor(jobId)) {
            // we can remove the job from the job leader service

            final FlinkExpectedException cause =
                    new FlinkExpectedException(
                            "TaskExecutor "
                                    + getAddress()
                                    + " has no more allocated slots for job "
                                    + jobId
                                    + '.');

            releaseJobResources(jobId, cause);
        }
    }

    private void timeoutSlot(AllocationID allocationId, UUID ticket) {
        checkNotNull(allocationId);
        checkNotNull(ticket);

        if (taskSlotTable.isValidTimeout(allocationId, ticket)) {
            freeSlotInternal(
                    allocationId, new Exception("The slot " + allocationId + " has timed out."));
        } else {
            log.debug(
                    "Received an invalid timeout for allocation id {} with ticket {}.",
                    allocationId,
                    ticket);
        }
    }

    /**
     * Syncs the TaskExecutor's view on its allocated slots with the JobMaster's view. Slots which
     * are no longer reported by the JobMaster are being freed. Slots which the JobMaster thinks it
     * still owns but which are no longer allocated to it will be failed via {@link
     * JobMasterGateway#failSlot}.
     *
     * @param jobMasterGateway jobMasterGateway to talk to the connected job master
     * @param allocatedSlotReport represents the JobMaster's view on the current slot allocation
     *     state
     */
    private void syncSlotsWithSnapshotFromJobMaster(
            JobMasterGateway jobMasterGateway, AllocatedSlotReport allocatedSlotReport) {
        failNoLongerAllocatedSlots(allocatedSlotReport, jobMasterGateway);
        freeNoLongerUsedSlots(allocatedSlotReport);
    }

    private void failNoLongerAllocatedSlots(
            AllocatedSlotReport allocatedSlotReport, JobMasterGateway jobMasterGateway) {
        for (AllocatedSlotInfo allocatedSlotInfo : allocatedSlotReport.getAllocatedSlotInfos()) {
            final AllocationID allocationId = allocatedSlotInfo.getAllocationId();
            if (!taskSlotTable.isAllocated(
                    allocatedSlotInfo.getSlotIndex(),
                    allocatedSlotReport.getJobId(),
                    allocationId)) {
                jobMasterGateway.failSlot(
                        getResourceID(),
                        allocationId,
                        new FlinkException(
                                String.format(
                                        "Slot %s on TaskExecutor %s is not allocated by job %s.",
                                        allocatedSlotInfo.getSlotIndex(),
                                        getResourceID().getStringWithMetadata(),
                                        allocatedSlotReport.getJobId())));
            }
        }
    }

    private void freeNoLongerUsedSlots(AllocatedSlotReport allocatedSlotReport) {
        final Set activeSlots =
                taskSlotTable.getActiveTaskSlotAllocationIdsPerJob(allocatedSlotReport.getJobId());
        final Set reportedSlots =
                allocatedSlotReport.getAllocatedSlotInfos().stream()
                        .map(AllocatedSlotInfo::getAllocationId)
                        .collect(Collectors.toSet());

        final Sets.SetView difference = Sets.difference(activeSlots, reportedSlots);

        for (AllocationID allocationID : difference) {
            freeSlotInternal(
                    allocationID,
                    new FlinkException(
                            String.format(
                                    "%s is no longer allocated by job %s.",
                                    allocationID, allocatedSlotReport.getJobId())));
        }
    }

    private void tryPersistAllocationSnapshot(SlotAllocationSnapshot slotAllocationSnapshot) {
        try {
            slotAllocationSnapshotPersistenceService.persistAllocationSnapshot(
                    slotAllocationSnapshot);
        } catch (IOException e) {
            log.debug("Cannot persist the slot allocation snapshot {}.", slotAllocationSnapshot, e);
        }
    }

    /**
     * This method tries to repopulate the {@link JobTable} and {@link TaskSlotTable} from the local
     * filesystem in a best-effort manner.
     */
    private void tryLoadLocalAllocationSnapshots() {
        Collection slotAllocationSnapshots =
                slotAllocationSnapshotPersistenceService.loadAllocationSnapshots();

        log.debug("Recovered slot allocation snapshots {}.", slotAllocationSnapshots);

        final Set allocatedSlots = new HashSet<>();
        for (SlotAllocationSnapshot slotAllocationSnapshot : slotAllocationSnapshots) {
            try {
                allocateSlotForJob(
                        slotAllocationSnapshot.getJobId(),
                        slotAllocationSnapshot.getSlotID(),
                        slotAllocationSnapshot.getAllocationId(),
                        slotAllocationSnapshot.getResourceProfile(),
                        slotAllocationSnapshot.getJobTargetAddress());

            } catch (SlotAllocationException e) {
                log.debug("Cannot reallocate restored slot {}.", slotAllocationSnapshot, e);
            }

            allocatedSlots.add(slotAllocationSnapshot.getAllocationId());
        }

        localStateStoresManager.retainLocalStateForAllocations(allocatedSlots);
    }

    // ------------------------------------------------------------------------
    //  Internal utility methods
    // ------------------------------------------------------------------------

    private boolean isConnectedToResourceManager() {
        return establishedResourceManagerConnection != null;
    }

    private boolean isConnectedToResourceManager(ResourceManagerId resourceManagerId) {
        return establishedResourceManagerConnection != null
                && resourceManagerAddress != null
                && resourceManagerAddress.getResourceManagerId().equals(resourceManagerId);
    }

    private boolean isJobManagerConnectionValid(JobID jobId, JobMasterId jobMasterId) {
        return jobTable.getConnection(jobId)
                .map(jmConnection -> Objects.equals(jmConnection.getJobMasterId(), jobMasterId))
                .orElse(false);
    }

    private CompletableFuture requestFileUploadByFilePath(
            String filePath, String fileTag) {
        log.debug("Received file upload request for file {}", fileTag);
        if (!StringUtils.isNullOrWhitespaceOnly(filePath)) {
            return CompletableFuture.supplyAsync(
                    () -> {
                        final File file = new File(filePath);
                        if (file.exists()) {
                            try {
                                return putTransientBlobStream(new FileInputStream(file), fileTag)
                                        .get();
                            } catch (Exception e) {
                                log.debug("Could not upload file {}.", fileTag, e);
                                throw new CompletionException(
                                        new FlinkException(
                                                "Could not upload file " + fileTag + '.', e));
                            }
                        } else {
                            log.debug(
                                    "The file {} does not exist on the TaskExecutor {}.",
                                    fileTag,
                                    getResourceID().getStringWithMetadata());
                            throw new CompletionException(
                                    new FlinkException(
                                            "The file "
                                                    + fileTag
                                                    + " does not exist on the TaskExecutor."));
                        }
                    },
                    ioExecutor);
        } else {
            log.debug(
                    "The file {} is unavailable on the TaskExecutor {}.",
                    fileTag,
                    getResourceID().getStringWithMetadata());
            return FutureUtils.completedExceptionally(
                    new FlinkException(
                            "The file " + fileTag + " is not available on the TaskExecutor."));
        }
    }

    private CompletableFuture putTransientBlobStream(
            InputStream inputStream, String fileTag) {
        final TransientBlobService transientBlobService =
                taskExecutorBlobService.getTransientBlobService();
        final TransientBlobKey transientBlobKey;

        try {
            transientBlobKey = transientBlobService.putTransient(inputStream);
        } catch (IOException e) {
            log.debug("Could not upload file {}.", fileTag, e);
            return FutureUtils.completedExceptionally(
                    new FlinkException("Could not upload file " + fileTag + '.', e));
        }
        return CompletableFuture.completedFuture(transientBlobKey);
    }

    // ------------------------------------------------------------------------
    //  Properties
    // ------------------------------------------------------------------------

    public ResourceID getResourceID() {
        return unresolvedTaskManagerLocation.getResourceID();
    }

    // ------------------------------------------------------------------------
    //  Error Handling
    // ------------------------------------------------------------------------

    /**
     * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
     *
     * @param t The exception describing the fatal error
     */
    void onFatalError(final Throwable t) {
        try {
            log.error("Fatal error occurred in TaskExecutor {}.", getAddress(), t);
        } catch (Throwable ignored) {
        }

        // The fatal error handler implementation should make sure that this call is non-blocking
        fatalErrorHandler.onFatalError(t);
    }

    // ------------------------------------------------------------------------
    //  Access to fields for testing
    // ------------------------------------------------------------------------

    @VisibleForTesting
    TaskExecutorToResourceManagerConnection getResourceManagerConnection() {
        return resourceManagerConnection;
    }

    @VisibleForTesting
    HeartbeatManager getResourceManagerHeartbeatManager() {
        return resourceManagerHeartbeatManager;
    }

    // ------------------------------------------------------------------------
    //  Utility classes
    // ------------------------------------------------------------------------

    private static final class JobManagerHeartbeatReceiver
            extends HeartbeatReceiver {
        private final JobMasterGateway jobMasterGateway;

        private JobManagerHeartbeatReceiver(JobMasterGateway jobMasterGateway) {
            this.jobMasterGateway = jobMasterGateway;
        }

        @Override
        public CompletableFuture receiveHeartbeat(
                ResourceID resourceID, TaskExecutorToJobManagerHeartbeatPayload payload) {
            return jobMasterGateway.heartbeatFromTaskManager(resourceID, payload);
        }
    }

    private static final class ResourceManagerHeartbeatReceiver
            extends HeartbeatReceiver {
        private final ResourceManagerGateway resourceManagerGateway;

        private ResourceManagerHeartbeatReceiver(ResourceManagerGateway resourceManagerGateway) {
            this.resourceManagerGateway = resourceManagerGateway;
        }

        @Override
        public CompletableFuture receiveHeartbeat(
                ResourceID resourceID, TaskExecutorHeartbeatPayload heartbeatPayload) {
            return resourceManagerGateway.heartbeatFromTaskManager(resourceID, heartbeatPayload);
        }
    }

    /** The listener for leader changes of the resource manager. */
    private final class ResourceManagerLeaderListener implements LeaderRetrievalListener {

        @Override
        public void notifyLeaderAddress(final String leaderAddress, final UUID leaderSessionID) {
            runAsync(
                    () ->
                            notifyOfNewResourceManagerLeader(
                                    leaderAddress,
                                    ResourceManagerId.fromUuidOrNull(leaderSessionID)));
        }

        @Override
        public void handleError(Exception exception) {
            onFatalError(exception);
        }
    }

    private final class JobLeaderListenerImpl implements JobLeaderListener {

        @Override
        public void jobManagerGainedLeadership(
                final JobID jobId,
                final JobMasterGateway jobManagerGateway,
                final JMTMRegistrationSuccess registrationMessage) {
            runAsync(
                    () ->
                            jobTable.getJob(jobId)
                                    .ifPresent(
                                            job ->
                                                    establishJobManagerConnection(
                                                            job,
                                                            jobManagerGateway,
                                                            registrationMessage)));
        }

        @Override
        public void jobManagerLostLeadership(final JobID jobId, final JobMasterId jobMasterId) {
            log.info(
                    "JobManager for job {} with leader id {} lost leadership.", jobId, jobMasterId);

            runAsync(
                    () ->
                            jobTable.getConnection(jobId)
                                    .ifPresent(
                                            jobManagerConnection ->
                                                    disconnectJobManagerConnection(
                                                            jobManagerConnection,
                                                            new Exception(
                                                                    "Job leader for job id "
                                                                            + jobId
                                                                            + " lost leadership."))));
        }

        @Override
        public void handleError(Throwable throwable) {
            onFatalError(throwable);
        }

        @Override
        public void jobManagerRejectedRegistration(
                JobID jobId, String targetAddress, JMTMRegistrationRejection rejection) {
            runAsync(() -> handleRejectedJobManagerConnection(jobId, targetAddress, rejection));
        }
    }

    private final class ResourceManagerRegistrationListener
            implements RegistrationConnectionListener<
                    TaskExecutorToResourceManagerConnection,
                    TaskExecutorRegistrationSuccess,
                    TaskExecutorRegistrationRejection> {

        @Override
        public void onRegistrationSuccess(
                TaskExecutorToResourceManagerConnection connection,
                TaskExecutorRegistrationSuccess success) {
            final ResourceID resourceManagerId = success.getResourceManagerId();
            final InstanceID taskExecutorRegistrationId = success.getRegistrationId();
            final ClusterInformation clusterInformation = success.getClusterInformation();
            final ResourceManagerGateway resourceManagerGateway = connection.getTargetGateway();

            byte[] tokens = success.getInitialTokens();
            if (tokens != null) {
                try {
                    log.info("Receive initial delegation tokens from resource manager");
                    delegationTokenReceiverRepository.onNewTokensObtained(tokens);
                } catch (Throwable t) {
                    log.error("Could not update delegation tokens.", t);
                    ExceptionUtils.rethrowIfFatalError(t);
                }
            }

            runAsync(
                    () -> {
                        // filter out outdated connections
                        //noinspection ObjectEquality
                        if (resourceManagerConnection == connection) {
                            try {
                                establishResourceManagerConnection(
                                        resourceManagerGateway,
                                        resourceManagerId,
                                        taskExecutorRegistrationId,
                                        clusterInformation);
                            } catch (Throwable t) {
                                log.error(
                                        "Establishing Resource Manager connection in Task Executor failed",
                                        t);
                            }
                        }
                    });
        }

        @Override
        public void onRegistrationFailure(Throwable failure) {
            onFatalError(failure);
        }

        @Override
        public void onRegistrationRejection(
                String targetAddress, TaskExecutorRegistrationRejection rejection) {
            onFatalError(
                    new FlinkException(
                            String.format(
                                    "The TaskExecutor's registration at the ResourceManager %s has been rejected: %s",
                                    targetAddress, rejection)));
        }
    }

    private final class TaskManagerActionsImpl implements TaskManagerActions {
        private final JobMasterGateway jobMasterGateway;

        private TaskManagerActionsImpl(JobMasterGateway jobMasterGateway) {
            this.jobMasterGateway = checkNotNull(jobMasterGateway);
        }

        @Override
        public void notifyFatalError(String message, Throwable cause) {
            try {
                log.error(message, cause);
            } catch (Throwable ignored) {
            }

            // The fatal error handler implementation should make sure that this call is
            // non-blocking
            fatalErrorHandler.onFatalError(cause);
        }

        @Override
        public void failTask(final ExecutionAttemptID executionAttemptID, final Throwable cause) {
            runAsync(() -> TaskExecutor.this.failTask(executionAttemptID, cause));
        }

        @Override
        public void updateTaskExecutionState(final TaskExecutionState taskExecutionState) {
            if (taskExecutionState.getExecutionState().isTerminal()) {
                runAsync(
                        () ->
                                unregisterTaskAndNotifyFinalState(
                                        jobMasterGateway, taskExecutionState.getID()));
            } else {
                TaskExecutor.this.updateTaskExecutionState(jobMasterGateway, taskExecutionState);
            }
        }
    }

    private class SlotActionsImpl implements SlotActions {

        @Override
        public void freeSlot(final AllocationID allocationId) {
            runAsync(
                    () ->
                            freeSlotInternal(
                                    allocationId,
                                    new FlinkException(
                                            "TaskSlotTable requested freeing the TaskSlot "
                                                    + allocationId
                                                    + '.')));
        }

        @Override
        public void timeoutSlot(final AllocationID allocationId, final UUID ticket) {
            runAsync(() -> TaskExecutor.this.timeoutSlot(allocationId, ticket));
        }
    }

    private class JobManagerHeartbeatListener
            implements HeartbeatListener<
                    AllocatedSlotReport, TaskExecutorToJobManagerHeartbeatPayload> {

        @Override
        public void notifyHeartbeatTimeout(final ResourceID resourceID) {
            final String message =
                    String.format(
                            "The heartbeat of JobManager with id %s timed out.",
                            resourceID.getStringWithMetadata());

            log.info(message);
            handleJobManagerConnectionLoss(resourceID, new TimeoutException(message));
        }

        @Override
        public void notifyTargetUnreachable(ResourceID resourceID) {
            final String message =
                    String.format(
                            "JobManager with id %s is no longer reachable.",
                            resourceID.getStringWithMetadata());
            log.info(message);

            handleJobManagerConnectionLoss(resourceID, new TaskManagerException(message));
        }

        private void handleJobManagerConnectionLoss(ResourceID resourceID, Exception cause) {
            validateRunsInMainThread();
            jobTable.getConnection(resourceID)
                    .ifPresent(
                            jobManagerConnection ->
                                    disconnectAndTryReconnectToJobManager(
                                            jobManagerConnection, cause));
        }

        @Override
        public void reportPayload(ResourceID resourceID, AllocatedSlotReport allocatedSlotReport) {
            validateRunsInMainThread();
            OptionalConsumer.of(jobTable.getConnection(allocatedSlotReport.getJobId()))
                    .ifPresent(
                            jobManagerConnection -> {
                                syncSlotsWithSnapshotFromJobMaster(
                                        jobManagerConnection.getJobManagerGateway(),
                                        allocatedSlotReport);
                            })
                    .ifNotPresent(
                            () ->
                                    log.debug(
                                            "Ignoring allocated slot report from job {} because there is no active leader.",
                                            allocatedSlotReport.getJobId()));
        }

        @Override
        public TaskExecutorToJobManagerHeartbeatPayload retrievePayload(ResourceID resourceID) {
            validateRunsInMainThread();
            return jobTable.getConnection(resourceID)
                    .map(
                            jobManagerConnection -> {
                                JobID jobId = jobManagerConnection.getJobId();

                                Set deployedExecutions = new HashSet<>();
                                List accumulatorSnapshots =
                                        new ArrayList<>(16);
                                Iterator allTasks = taskSlotTable.getTasks(jobId);

                                while (allTasks.hasNext()) {
                                    Task task = allTasks.next();
                                    deployedExecutions.add(task.getExecutionId());
                                    accumulatorSnapshots.add(
                                            task.getAccumulatorRegistry().getSnapshot());
                                }
                                return new TaskExecutorToJobManagerHeartbeatPayload(
                                        new AccumulatorReport(accumulatorSnapshots),
                                        new ExecutionDeploymentReport(deployedExecutions));
                            })
                    .orElseGet(TaskExecutorToJobManagerHeartbeatPayload::empty);
        }
    }

    private class ResourceManagerHeartbeatListener
            implements HeartbeatListener {

        @Override
        public void notifyHeartbeatTimeout(final ResourceID resourceId) {
            final String message =
                    String.format(
                            "The heartbeat of ResourceManager with id %s timed out.",
                            resourceId.getStringWithMetadata());

            log.info(message);
            handleResourceManagerConnectionLoss(resourceId, new TaskManagerException(message));
        }

        @Override
        public void notifyTargetUnreachable(ResourceID resourceID) {
            final String message =
                    String.format(
                            "ResourceManager with id %s is no longer reachable.",
                            resourceID.getStringWithMetadata());
            log.info(message);

            handleResourceManagerConnectionLoss(resourceID, new TaskManagerException(message));
        }

        private void handleResourceManagerConnectionLoss(
                ResourceID resourceId, TaskManagerException cause) {
            validateRunsInMainThread();

            // first check whether the timeout is still valid
            if (establishedResourceManagerConnection != null
                    && establishedResourceManagerConnection
                            .getResourceManagerResourceId()
                            .equals(resourceId)) {

                reconnectToResourceManager(cause);
            }
        }

        @Override
        public void reportPayload(ResourceID resourceID, Void payload) {
            // nothing to do since the payload is of type Void
        }

        @Override
        public TaskExecutorHeartbeatPayload retrievePayload(ResourceID resourceID) {
            validateRunsInMainThread();
            return new TaskExecutorHeartbeatPayload(
                    taskSlotTable.createSlotReport(getResourceID()),
                    partitionTracker.createClusterPartitionReport());
        }
    }

    @VisibleForTesting
    static final class TaskExecutorJobServices implements JobTable.JobServices {

        private final LibraryCacheManager.ClassLoaderLease classLoaderLease;

        private final Runnable closeHook;

        private TaskExecutorJobServices(
                LibraryCacheManager.ClassLoaderLease classLoaderLease, Runnable closeHook) {
            this.classLoaderLease = classLoaderLease;
            this.closeHook = closeHook;
        }

        @Override
        public LibraryCacheManager.ClassLoaderHandle getClassLoaderHandle() {
            return classLoaderLease;
        }

        @Override
        public void close() {
            classLoaderLease.release();
            closeHook.run();
        }

        @VisibleForTesting
        static TaskExecutorJobServices create(
                LibraryCacheManager.ClassLoaderLease classLoaderLease, Runnable closeHook) {
            return new TaskExecutorJobServices(classLoaderLease, closeHook);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy