All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.dispatcher.DriverDispatcher Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.dispatcher;

import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.DriverConfigConstants;
import org.apache.flink.runtime.blob.BlobServer;
import org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.messages.webmonitor.ClusterOverview;
import org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup;
import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.LeaderShipLostHandler;
import org.apache.flink.runtime.rpc.RpcService;

import javax.annotation.Nullable;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CompletableFuture;

/**
 * DriverDispatcher is used for monitoring Jobs' termination state in driver mode. When a job reaches termination state,
 * the driver would watch how many running jobs in the cluster, the dispatcher will exit once there is no running or
 * pending jobs or the driver job finished exceptionally.
 * */
public class DriverDispatcher extends Dispatcher {

	private Map dispatcherManagerPriorityMap = new HashMap<>();

	public DriverDispatcher(RpcService rpcService,
							String endpointId,
							Configuration configuration,
							HighAvailabilityServices highAvailabilityServices,
							ResourceManagerGateway resourceManagerGateway,
							BlobServer blobServer,
							HeartbeatServices heartbeatServices,
							JobManagerMetricGroup jobManagerMetricGroup,
							@Nullable String metricServiceQueryPath,
							ArchivedExecutionGraphStore archivedExecutionGraphStore,
							JobManagerRunnerFactory jobManagerRunnerFactory,
							FatalErrorHandler fatalErrorHandler,
							@Nullable String restAddress,
							HistoryServerArchivist historyServerArchivist,
							LeaderShipLostHandler leaderShipLostHandler) throws Exception {
		super(rpcService,
			endpointId,
			configuration,
			highAvailabilityServices,
			highAvailabilityServices.getSubmittedJobGraphStore(),
			resourceManagerGateway,
			blobServer,
			heartbeatServices,
			jobManagerMetricGroup,
			metricServiceQueryPath,
			archivedExecutionGraphStore,
			jobManagerRunnerFactory,
			fatalErrorHandler,
			restAddress,
			historyServerArchivist,
			leaderShipLostHandler);
		log.info("Initiating DriverDispatcher Instance.");
	}

	@Override
	public CompletableFuture submitJob(JobGraph jobGraph, Time timeout) {
		Integer dispatcherManagePriority = jobGraph.getJobConfiguration().getInteger(
			DriverConfigConstants.FLINK_DRIVER_MANAGE_PRIORIRY, 1);
		log.info("submit new job, jobId: " + jobGraph.getJobID());
		log.info(DriverConfigConstants.FLINK_DRIVER_MANAGE_PRIORIRY + ": " + dispatcherManagePriority);
		dispatcherManagerPriorityMap.put(jobGraph.getJobID(), dispatcherManagePriority);
		return super.submitJob(jobGraph, timeout);

	}

	@Override
	protected void jobReachedGloballyTerminalState(ArchivedExecutionGraph archivedExecutionGraph) {
		log.info("Job: " + archivedExecutionGraph.getJobName() + " Reached GloballyTerminalState.");
		super.jobReachedGloballyTerminalState(archivedExecutionGraph);
		int jobDispatcherManagePriority = dispatcherManagerPriorityMap.get(archivedExecutionGraph.getJobID());
		if (jobDispatcherManagePriority == DriverConfigConstants.FLINK_DRIVER_MANAGE_PRIORITY_DRIVER) {
			if (archivedExecutionGraph.getState().equals(JobStatus.FAILED)) {
				log.error("Driver Job FAILED, terminate driverDispatcher.");
				getTerminationFuture().complete(null);
				return;
			}
		}

		CompletableFuture clusterOverview = requestClusterOverview(Time.seconds(30000));
		clusterOverview.thenApply((ClusterOverview overview) -> {
			int numJobsRunning = overview.getNumJobsRunningOrPending();
			if (numJobsRunning == 0) {
				log.info("All jobs finished, terminate driverDispatcher.");
				getTerminationFuture().complete(null);
			}
			return null;
		});
	}

	@Override
	protected void jobNotFinished(JobID jobId) {
		super.jobNotFinished(jobId);
		int jobDispatcherManagePriority = dispatcherManagerPriorityMap.get(jobId);
		if (jobDispatcherManagePriority == DriverConfigConstants.FLINK_DRIVER_MANAGE_PRIORITY_DRIVER) {
			log.error("Driver Job FAILED, terminate driverDispatcher.");
			getTerminationFuture().complete(null);
		}

	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy