Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.resourcemanager.resultpartitionmaster;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.runtime.concurrent.ScheduledExecutor;
import org.apache.flink.runtime.instance.InstanceID;
import org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection;
import org.apache.flink.runtime.taskexecutor.ResultPartitionReport;
import org.apache.flink.runtime.taskexecutor.ResultPartitionStatus;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* The result partition master is responsible for maintaining a view of all the remaining finished
* result partitions. It helps to clear all the result partitions when a job finishes. Besides, to
* avoid the resource leaks, it periodically clears the result partitions who do not belong to an
* alive job.
*/
public class ResultPartitionMaster implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(ResultPartitionMaster.class);
/** All currently registered task managers. */
private final HashMap taskManagerRegistrations = new HashMap<>(4);
/** The finished result partition grouped by jobs resides on each task executor. */
private final Map>> resultPartitions = new HashMap<>();
/** Scheduled executor for timeouts. */
private final ScheduledExecutor scheduledExecutor;
/**
* The jobs that are active or waiting to be timeout. Result partitions belongs to jobs not included in the map
* should be cleared.
*/
private final Map registeredJobs = new HashMap<>();
/** Executor for future callbacks which have to be "synchronized". */
private Executor mainThreadExecutor;
/** The timeout for a result partition to be released after the corresponding job master disconnected in millisecond. */
private Time resultPartitionDisconnectJmTimeout;
/** The task to check result partitions belongs to timeout jobs. */
private ScheduledFuture resultPartitionDisconnectJmTimeoutChecker;
/** True iff the component has been started. */
private boolean started;
public ResultPartitionMaster(ScheduledExecutor scheduledExecutor, Time resultPartitionDisconnectJmTimeout) {
this.scheduledExecutor = scheduledExecutor;
this.resultPartitionDisconnectJmTimeout = resultPartitionDisconnectJmTimeout;
}
/**
* Starts the result partition master.
*
* @param newMainThreadExecutor to use to run code in the ResourceManager's main thread.
*/
public void start(Executor newMainThreadExecutor) {
started = true;
this.mainThreadExecutor = checkNotNull(newMainThreadExecutor);
resultPartitionDisconnectJmTimeoutChecker = scheduledExecutor.scheduleWithFixedDelay(
() -> mainThreadExecutor.execute(this::checkResultPartitionDisconnectJmTimeout),
resultPartitionDisconnectJmTimeout.toMilliseconds(),
resultPartitionDisconnectJmTimeout.toMilliseconds(),
TimeUnit.MILLISECONDS);
}
/**
* Suspends the components and clear its states.
*/
public void suspend() {
LOG.info("Suspending the ResultPartitionMaster");
if (resultPartitionDisconnectJmTimeoutChecker != null) {
resultPartitionDisconnectJmTimeoutChecker.cancel(false);
resultPartitionDisconnectJmTimeoutChecker = null;
}
ArrayList registeredTaskManagers = new ArrayList<>(taskManagerRegistrations.keySet());
for (InstanceID registeredTaskManager : registeredTaskManagers) {
unregisterTaskManager(registeredTaskManager);
}
started = false;
}
/**
* Close the result partition master.
*/
@Override
public void close() throws Exception {
LOG.info("Close the result partition master");
suspend();
}
/**
* Registers a new task manager into the result partition master.
*
* @param taskExecutorConnection for the new task manager.
* @param initialResultPartitionReport for the new task manager.
*/
public void registerTaskManager(final TaskExecutorConnection taskExecutorConnection, ResultPartitionReport initialResultPartitionReport) {
checkInit();
LOG.info("Registering TaskManager {} under {} at the ResultPartitionMaster.", taskExecutorConnection.getResourceID(), taskExecutorConnection.getInstanceID());
// we identify task managers by their instance id
if (!taskManagerRegistrations.containsKey(taskExecutorConnection.getInstanceID())) {
taskManagerRegistrations.put(taskExecutorConnection.getInstanceID(), taskExecutorConnection);
}
reportResultPartitionStatus(taskExecutorConnection.getInstanceID(), initialResultPartitionReport);
}
/**
* Unregisters a task manager from the result partition master.
*
* @param instanceId of the unregistered task manager.
* @return Whether the unregistration succeeds
*/
public boolean unregisterTaskManager(InstanceID instanceId) {
TaskExecutorConnection taskManagerRegistration = taskManagerRegistrations.remove(instanceId);
if (null != taskManagerRegistration) {
resultPartitions.remove(instanceId);
return true;
} else {
LOG.debug("There is no task manager registered with instance ID {}. Ignoring this message.", instanceId);
return false;
}
}
/**
* Reports the finished result partitions resides on the task manager currently. The maintained list will be override
* by the reported list.
*
* @param instanceId of the reported task manager.
* @param resultPartitionReport the reported status of the finished result partitions.
*/
public void reportResultPartitionStatus(InstanceID instanceId, ResultPartitionReport resultPartitionReport) {
checkInit();
LOG.debug("Received result partition report from instance {}.", instanceId);
Map> resultPartitionsOnTm = resultPartitions.get(instanceId);
if (resultPartitionsOnTm != null) {
resultPartitionsOnTm.clear();
} else {
resultPartitionsOnTm = new HashMap<>();
resultPartitions.put(instanceId, resultPartitionsOnTm);
}
for (ResultPartitionStatus status : resultPartitionReport) {
resultPartitionsOnTm.computeIfAbsent(status.getJobId(), k -> new HashSet<>()).add(status);
}
}
/**
* Registers a new finished result partition on a task manager. The new result partition will be inserted into
* the list of that task manager directly.
*
* @param instanceId of the task manager to register.
* @param resultPartitionStatus The status of the newly registered result partition.
*/
public void addFinishedResultPartition(InstanceID instanceId, ResultPartitionStatus resultPartitionStatus) {
checkInit();
Map> resultPartitionsOnTm = resultPartitions.get(instanceId);
if (resultPartitionsOnTm != null) {
resultPartitionsOnTm.computeIfAbsent(resultPartitionStatus.getJobId(), k -> new HashSet<>())
.add(resultPartitionStatus);
}
}
/**
* Releases all the result partitions belonging to the specific job.
*
* @param jobId The job id of the job to release.
*/
public void releaseResultPartitions(JobID jobId) {
checkInit();
taskManagerRegistrations.forEach((instanceId, connection) -> {
Map> resultPartitionsOnTm = resultPartitions.get(instanceId);
if (resultPartitionsOnTm != null && resultPartitionsOnTm.containsKey(jobId)) {
resultPartitionsOnTm.remove(jobId);
connection.getTaskExecutorGateway().releaseResultPartitions(jobId);
}
});
}
/**
* Notifies a new job has started or recovered.
*
* @param jobId the newly registered job id.
*/
public void registerJob(JobID jobId) {
checkInit();
RegisteredJobStatus jobStatus = registeredJobs.compute(
jobId, (k, v) -> v == null ? new RegisteredJobStatus(jobId) : v);
jobStatus.markOnline();
}
/**
* Notifies a job has been finished or disconnected.
*
* @param jobId the newly unregistered job id.
*/
public void unregisterJob(JobID jobId) {
checkInit();
RegisteredJobStatus jobStatus = registeredJobs.get(jobId);
if (jobStatus != null) {
jobStatus.markOffline();
} else {
LOG.warn("Unregistered a job that has not been registered before, job id is " + jobId);
}
}
// ---------------------------------------------------------------------------------------------
// Internal timeout methods
// ---------------------------------------------------------------------------------------------
/**
* Checks and clears the result partitions that has been without a corresponding time for too long time.
*/
private void checkResultPartitionDisconnectJmTimeout() {
checkInit();
Set timeoutJobs = new HashSet<>();
registeredJobs.forEach((jobId, jobStatus) -> {
if (jobStatus.isTimeout(resultPartitionDisconnectJmTimeout.toMilliseconds())) {
timeoutJobs.add(jobId);
}
});
for (JobID jobId : timeoutJobs) {
registeredJobs.remove(jobId);
}
resultPartitions.forEach((instanceID, resultPartitionsOnTm) -> {
TaskExecutorConnection connection = taskManagerRegistrations.get(instanceID);
if (connection != null) {
Set jobsToRemove = new HashSet<>();
Iterator jobsIterator = resultPartitionsOnTm.keySet().iterator();
while (jobsIterator.hasNext()) {
JobID jobId = jobsIterator.next();
if (!registeredJobs.containsKey(jobId)) {
jobsToRemove.add(jobId);
jobsIterator.remove();
}
}
for (JobID jobId : jobsToRemove) {
connection.getTaskExecutorGateway().releaseResultPartitions(jobId);
}
} else {
LOG.warn("No connection found with task manager " + instanceID +
", but there are remaining result partitions recorded");
}
});
}
@VisibleForTesting
HashMap getTaskManagerRegistrations() {
return taskManagerRegistrations;
}
@VisibleForTesting
Map>> getResultPartitions() {
return resultPartitions;
}
// ---------------------------------------------------------------------------------------------
// Internal utility methods
// ---------------------------------------------------------------------------------------------
private void checkInit() {
Preconditions.checkState(started, "The slot manager has not been started.");
}
}