org.apache.flink.runtime.minicluster.MiniClusterJobDispatcher Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of flink-runtime_2.11 Show documentation
There is a newer version: 1.13.6
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.minicluster;

import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.blob.BlobServer;
import org.apache.flink.runtime.client.JobExecutionException;
import org.apache.flink.runtime.clusterframework.types.ResourceID;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobmanager.OnCompletionActions;
import org.apache.flink.runtime.jobmaster.JobManagerRunner;
import org.apache.flink.runtime.jobmaster.JobManagerServices;
import org.apache.flink.runtime.metrics.MetricRegistry;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.RpcService;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;

import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * The dispatcher that runs in the mini cluster, waits for jobs, and starts job masters
 * upon receiving jobs.
 */
public class MiniClusterJobDispatcher {

	private static final Logger LOG = LoggerFactory.getLogger(MiniClusterJobDispatcher.class);

	// ------------------------------------------------------------------------

	/** lock to ensure that this dispatcher executes only one job at a time */
	private final Object lock = new Object();

	/** the configuration with which the mini cluster was started */
	private final Configuration configuration;

	/** the RPC services to use by the job managers */
	private final RpcService[] rpcServices;

	/** services for discovery, leader election, and recovery */
	private final HighAvailabilityServices haServices;

	/** services for heartbeating */
	private final HeartbeatServices heartbeatServices;

	/** all the services that the JobManager needs, such as BLOB service, factories, etc */
	private final JobManagerServices jobManagerServices;

	/** Registry for all metrics in the mini cluster */
	private final MetricRegistry metricRegistry;

	/** The number of JobManagers to launch (more than one simulates a high-availability setup) */
	private final int numJobManagers;

	/** The runner for the job and master. non-null if a job is currently running */
	private volatile JobManagerRunner[] runners;

	/** flag marking the dispatcher as hut down */
	private volatile boolean shutdown;


	/**
	 * Starts a mini cluster job dispatcher.
	 * 
	 * The dispatcher kicks off one JobManager per job, a behavior similar to a
	 * non-highly-available setup.
	 * 
	 * @param config The configuration of the mini cluster
	 * @param haServices Access to the discovery, leader election, and recovery services
	 * 
	 * @throws Exception Thrown, if the services for the JobMaster could not be started.
	 */
	public MiniClusterJobDispatcher(
			Configuration config,
			RpcService rpcService,
			HighAvailabilityServices haServices,
			BlobServer blobServer,
			HeartbeatServices heartbeatServices,
			MetricRegistry metricRegistry) throws Exception {
		this(
			config,
			haServices,
			blobServer,
			heartbeatServices,
			metricRegistry,
			1,
			new RpcService[] { rpcService });
	}

	/**
	 * Starts a mini cluster job dispatcher.
	 *
	 * The dispatcher may kick off more than one JobManager per job, thus simulating
	 * a highly-available setup.
	 * 
	 * @param config The configuration of the mini cluster
	 * @param haServices Access to the discovery, leader election, and recovery services
	 * @param numJobManagers The number of JobMasters to start for each job.
	 * 
	 * @throws Exception Thrown, if the services for the JobMaster could not be started.
	 */
	public MiniClusterJobDispatcher(
			Configuration config,
			HighAvailabilityServices haServices,
			BlobServer blobServer,
			HeartbeatServices heartbeatServices,
			MetricRegistry metricRegistry,
			int numJobManagers,
			RpcService[] rpcServices) throws Exception {
		
		checkArgument(numJobManagers >= 1);
		checkArgument(rpcServices.length == numJobManagers);
		
		this.configuration = checkNotNull(config);
		this.rpcServices = rpcServices;
		this.haServices = checkNotNull(haServices);
		this.heartbeatServices = checkNotNull(heartbeatServices);
		this.metricRegistry = checkNotNull(metricRegistry);
		this.numJobManagers = numJobManagers;

		LOG.info("Creating JobMaster services");
		this.jobManagerServices = JobManagerServices.fromConfiguration(config, blobServer);
	}

	// ------------------------------------------------------------------------
	//  life cycle
	// ------------------------------------------------------------------------

	/**
	 * Shuts down the mini cluster dispatcher. If a job is currently running, that job will be
	 * terminally failed.
	 */
	public void shutdown() throws Exception {
		synchronized (lock) {
			if (!shutdown) {
				shutdown = true;

				LOG.info("Shutting down the job dispatcher");

				// in this shutdown code we copy the references to the stack first,
				// to avoid concurrent modification

				Throwable exception = null;

				JobManagerRunner[] runners = this.runners;
				if (runners != null) {
					this.runners = null;

					for (JobManagerRunner runner : runners) {
						try {
							runner.shutdown();
						} catch (Throwable e) {
							exception = ExceptionUtils.firstOrSuppressed(e, exception);
						}
					}
				}

				// shut down the JobManagerServices
				try {
					jobManagerServices.shutdown();
				} catch (Throwable throwable) {
					exception = ExceptionUtils.firstOrSuppressed(throwable, exception);
				}

				if (exception != null) {
					throw new FlinkException("Could not properly terminate all JobManagerRunners.", exception);
				}
			}
		}
	}

	// ------------------------------------------------------------------------
	//  submitting jobs
	// ------------------------------------------------------------------------

	/**
	 * This method executes a job in detached mode. The method returns immediately after the job
	 * has been added to the
	 *
	 * @param job  The Flink job to execute
	 *
	 * @throws JobExecutionException Thrown if anything went amiss during initial job launch,
	 *         or if the job terminally failed.
	 */
	public void runDetached(JobGraph job) throws JobExecutionException {
		checkNotNull(job);

		LOG.info("Received job for detached execution: {} ({})", job.getName(), job.getJobID());

		synchronized (lock) {
			checkState(!shutdown, "mini cluster is shut down");
			checkState(runners == null, "mini cluster can only execute one job at a time");

			DetachedFinalizer finalizer = new DetachedFinalizer(job.getJobID(), numJobManagers);

			this.runners = startJobRunners(job, finalizer, finalizer);
		}
	}

	/**
	 * This method runs a job in blocking mode. The method returns only after the job
	 * completed successfully, or after it failed terminally.
	 *
	 * @param job  The Flink job to execute 
	 * @return The result of the job execution
	 *
	 * @throws JobExecutionException Thrown if anything went amiss during initial job launch,
	 *         or if the job terminally failed.
	 */
	public JobExecutionResult runJobBlocking(JobGraph job) throws JobExecutionException, InterruptedException {
		checkNotNull(job);
		
		LOG.info("Received job for blocking execution: {} ({})", job.getName(), job.getJobID());
		final BlockingJobSync sync = new BlockingJobSync(job.getJobID(), numJobManagers);

		synchronized (lock) {
			checkState(!shutdown, "mini cluster is shut down");
			checkState(runners == null, "mini cluster can only execute one job at a time");

			this.runners = startJobRunners(job, sync, sync);
		}

		try {
			return sync.getResult();
		}
		finally {
			// always clear the status for the next job
			runners = null;
			clearJobRunningState(job.getJobID());
		}
	}

	private JobManagerRunner[] startJobRunners(
			JobGraph job,
			OnCompletionActions onCompletion,
			FatalErrorHandler errorHandler) throws JobExecutionException {

		LOG.info("Starting {} JobMaster(s) for job {} ({})", numJobManagers, job.getName(), job.getJobID());

		// start all JobManagers
		JobManagerRunner[] runners = new JobManagerRunner[numJobManagers];
		for (int i = 0; i < numJobManagers; i++) {
			try {
				runners[i] = new JobManagerRunner(
					ResourceID.generate(),
					job,
					configuration,
					rpcServices[i],
					haServices,
					heartbeatServices,
					jobManagerServices,
					metricRegistry,
					onCompletion,
					errorHandler);
				runners[i].start();
			}
			catch (Throwable t) {
				// shut down all the ones so far
				for (int k = 0; k <= i; k++) {
					try {
						if (runners[i] != null) {
							runners[i].shutdown();
						}
					} catch (Throwable ignored) {
						// silent shutdown
					}
				}

				// un-register the job from the high.availability services
				try {
					haServices.getRunningJobsRegistry().setJobFinished(job.getJobID());
				}
				catch (Throwable tt) {
					LOG.warn("Could not properly unregister job from high-availability services", tt);
				}

				throw new JobExecutionException(job.getJobID(), "Could not start the JobManager(s) for the job", t);
			}
		}

		return runners;
	}

	private void clearJobRunningState(JobID jobID) {
		// we mark the job as finished in the HA services, so need
		// to remove the data after job finished
		try {
			haServices.getRunningJobsRegistry().clearJob(jobID);

			// TODO: Remove job data from BlobServer
		}
		catch (Throwable t) {
			LOG.warn("Could not clear job {} at the status registry of the high-availability services", jobID, t);
		}
	}

	// ------------------------------------------------------------------------
	//  test methods to simulate job master failures
	// ------------------------------------------------------------------------

//	public void killJobMaster(int which) {
//		checkArgument(which >= 0 && which < numJobManagers, "no such job master");
//		checkState(!shutdown, "mini cluster is shut down");
//
//		JobManagerRunner[] runners = this.runners;
//		checkState(runners != null, "mini cluster it not executing a job right now");
//
//		runners[which].shutdown(new Throwable("kill JobManager"));
//	}

	// ------------------------------------------------------------------------
	//  utility classes
	// ------------------------------------------------------------------------

	/**
	 * Simple class that waits for all runners to have reported that they are done.
	 * In the case of a high-availability test setup, there may be multiple runners.
	 * After that, it marks the mini cluster as ready to receive new jobs.
	 */
	private class DetachedFinalizer implements OnCompletionActions, FatalErrorHandler {

		private final JobID jobID;

		private final AtomicInteger numJobManagersToWaitFor;

		private DetachedFinalizer(JobID jobID, int numJobManagersToWaitFor) {
			this.jobID = jobID;
			this.numJobManagersToWaitFor = new AtomicInteger(numJobManagersToWaitFor);
		}

		@Override
		public void jobFinished(JobExecutionResult result) {
			decrementCheckAndCleanup();
		}

		@Override
		public void jobFailed(Throwable cause) {
			decrementCheckAndCleanup();
		}

		@Override
		public void jobFinishedByOther() {
			decrementCheckAndCleanup();
		}

		@Override
		public void onFatalError(Throwable exception) {
			decrementCheckAndCleanup();
		}

		private void decrementCheckAndCleanup() {
			if (numJobManagersToWaitFor.decrementAndGet() == 0) {
				MiniClusterJobDispatcher.this.runners = null;
				MiniClusterJobDispatcher.this.clearJobRunningState(jobID);
			}
		}
	}

	// ------------------------------------------------------------------------

	/**
	 * This class is used to sync on blocking jobs across multiple runners.
	 * Only after all runners reported back that they are finished, the
	 * result will be released.
	 * 
	 * That way it is guaranteed that after the blocking job submit call returns,
	 * the dispatcher is immediately free to accept another job.
	 */
	private static class BlockingJobSync implements OnCompletionActions, FatalErrorHandler {

		private final JobID jobId;

		private final CountDownLatch jobMastersToWaitFor;

		private volatile Throwable jobException;

		private volatile Throwable runnerException;

		private volatile JobExecutionResult result;
		
		BlockingJobSync(JobID jobId, int numJobMastersToWaitFor) {
			this.jobId = jobId;
			this.jobMastersToWaitFor = new CountDownLatch(numJobMastersToWaitFor);
		}

		@Override
		public void jobFinished(JobExecutionResult jobResult) {
			this.result = jobResult;
			jobMastersToWaitFor.countDown();
		}

		@Override
		public void jobFailed(Throwable cause) {
			jobException = cause;
			jobMastersToWaitFor.countDown();
		}

		@Override
		public void jobFinishedByOther() {
			this.jobMastersToWaitFor.countDown();
		}

		@Override
		public void onFatalError(Throwable exception) {
			if (runnerException == null) {
				runnerException = exception;
			}
		}

		public JobExecutionResult getResult() throws JobExecutionException, InterruptedException {
			jobMastersToWaitFor.await();

			final Throwable jobFailureCause = this.jobException;
			final Throwable runnerException = this.runnerException;
			final JobExecutionResult result = this.result;

			// (1) we check if the job terminated with an exception
			// (2) we check whether the job completed successfully
			// (3) we check if we have exceptions from the JobManagers. the job may still have
			//     completed successfully in that case, if multiple JobMasters were running
			//     and other took over. only if all encounter a fatal error, the job cannot finish

			if (jobFailureCause != null) {
				if (jobFailureCause instanceof JobExecutionException) {
					throw (JobExecutionException) jobFailureCause;
				}
				else {
					throw new JobExecutionException(jobId, "The job execution failed", jobFailureCause);
				}
			}
			else if (result != null) {
				return result;
			}
			else if (runnerException != null) {
				throw new JobExecutionException(jobId,
						"The job execution failed because all JobManagers encountered fatal errors", runnerException);
			}
			else {
				throw new IllegalStateException("Bug: Job finished with neither error nor result.");
			}
		}
	}
}