All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.dispatcher.StandaloneDispatcher Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.dispatcher;

import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.CoreOptions;
import org.apache.flink.runtime.blob.BlobServer;
import org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph;
import org.apache.flink.runtime.heartbeat.HeartbeatServices;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobmaster.JobMaster;
import org.apache.flink.runtime.messages.Acknowledge;
import org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup;
import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
import org.apache.flink.runtime.rpc.FatalErrorHandler;
import org.apache.flink.runtime.rpc.LeaderShipLostHandler;
import org.apache.flink.runtime.rpc.RpcService;
import org.apache.flink.runtime.rpc.RpcUtils;

import javax.annotation.Nullable;

import java.util.Collection;
import java.util.concurrent.CompletableFuture;

/**
 * Dispatcher implementation which spawns a {@link JobMaster} for each
 * submitted {@link JobGraph} within in the same process. This dispatcher
 * can be used as the default for all different session clusters.
 */
public class StandaloneDispatcher extends Dispatcher {

	private long clusterIdleStartTimestamp = -1;

	private Time clusterIdleTimeout;

	public StandaloneDispatcher(
			RpcService rpcService,
			String endpointId,
			Configuration configuration,
			HighAvailabilityServices highAvailabilityServices,
			ResourceManagerGateway resourceManagerGateway,
			BlobServer blobServer,
			HeartbeatServices heartbeatServices,
			JobManagerMetricGroup jobManagerMetricGroup,
			@Nullable String metricQueryServicePath,
			ArchivedExecutionGraphStore archivedExecutionGraphStore,
			JobManagerRunnerFactory jobManagerRunnerFactory,
			FatalErrorHandler fatalErrorHandler,
			@Nullable String restAddress,
			HistoryServerArchivist historyServerArchivist,
			LeaderShipLostHandler leaderShipLostHandler) throws Exception {
		super(
			rpcService,
			endpointId,
			configuration,
			highAvailabilityServices,
			highAvailabilityServices.getSubmittedJobGraphStore(),
			resourceManagerGateway,
			blobServer,
			heartbeatServices,
			jobManagerMetricGroup,
			metricQueryServicePath,
			archivedExecutionGraphStore,
			jobManagerRunnerFactory,
			fatalErrorHandler,
			restAddress,
			historyServerArchivist,
			leaderShipLostHandler);
		this.clusterIdleTimeout = Time.milliseconds(configuration.getLong(CoreOptions.CLUSTER_IDLE_TIMEOUT));
		log.info("Init StandaloneDispatcher, clusterIdleTimeout: {}", clusterIdleTimeout);
	}

	@Override
	public CompletableFuture submitJob(JobGraph jobGraph, Time timeout) {
		if (clusterIdleTimeout.toMilliseconds() < 0) {
			return super.submitJob(jobGraph, timeout);
		}
		CompletableFuture submitJobFuture = super.submitJob(jobGraph, timeout);
		return submitJobFuture.thenApply(acknowledge -> {
			clusterIdleStartTimestamp = -1;
			log.info("Reset the begin timestamp of delaying cluster termination to -1.");
			return acknowledge;
		});
	}

	@Override
	protected void jobReachedGloballyTerminalState(ArchivedExecutionGraph archivedExecutionGraph) {
		super.jobReachedGloballyTerminalState(archivedExecutionGraph);
		if (clusterIdleTimeout.toMilliseconds() < 0) {
			return;
		}
		// check if all jobs are globally terminated.
		CompletableFuture numNonGloballyTerminatedJobsFuture = getNumNonGloballyTerminatedJobsFuture();
		numNonGloballyTerminatedJobsFuture.thenAccept(num -> {
			log.info("Number of non-global-terminated jobs is {}.", num);
			if (num == 0) {
				clusterIdleStartTimestamp = System.currentTimeMillis();
				scheduleRunAsync(this::terminateIdleApp, clusterIdleTimeout);
				log.info("Delaying cluster termination after {} and set cluster-idle-start-timestamp: {}.",
					clusterIdleTimeout, clusterIdleStartTimestamp);
			}
		});
	}

	private void terminateIdleApp() {
		log.info("Double check for delay termination, cluster-idle-start-timestamp: {}.", clusterIdleStartTimestamp);
		if (clusterIdleStartTimestamp > 0 &&
			System.currentTimeMillis() - clusterIdleStartTimestamp >= clusterIdleTimeout.toMilliseconds()) {
			CompletableFuture numNonGloballyTerminatedJobsFuture = getNumNonGloballyTerminatedJobsFuture();
			numNonGloballyTerminatedJobsFuture.thenAccept(num -> {
				log.info("Number of non-global-terminated jobs in double check is {}.", num);
				if (num == 0) {
					log.info("Start terminating this cluster.", num);
					getTerminationFuture().complete(null);
				}
			});
		}
	}

	private CompletableFuture getNumNonGloballyTerminatedJobsFuture() {
		CompletableFuture> nonArchivedJobsFuture = getNonArchivedJobsFuture(RpcUtils.INF_TIMEOUT);
		return nonArchivedJobsFuture.thenApply(nonArchivedJobs -> nonArchivedJobs.stream()
			.filter(jobStatus -> !jobStatus.isGloballyTerminalState()).count());
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy