All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.instance.InstanceManager Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.instance;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import akka.actor.ActorRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Simple manager that keeps track of which TaskManager are available and alive.
 */
public class InstanceManager {

	private static final Logger LOG = LoggerFactory.getLogger(InstanceManager.class);

	// ------------------------------------------------------------------------
	// Fields
	// ------------------------------------------------------------------------

	/** Global lock */
	private final Object lock = new Object();

	/** Set of hosts known to run a task manager that are thus able to execute tasks (by ID). */
	private final Map registeredHostsById;

	/** Set of hosts known to run a task manager that are thus able to execute tasks (by connection). */
	private final Map registeredHostsByConnection;

	/** Set of hosts that were present once and have died */
	private final Set deadHosts;

	/** Listeners that want to be notified about availability and disappearance of instances */
	private final List instanceListeners = new ArrayList();

	/** The total number of task slots that the system has */
	private int totalNumberOfAliveTaskSlots;

	/** Flag marking the system as shut down */
	private volatile boolean isShutdown;

	// ------------------------------------------------------------------------
	// Constructor and set-up
	// ------------------------------------------------------------------------

	/**
	 * Creates an new instance manager.
	 */
	public InstanceManager() {
		this.registeredHostsById = new HashMap();
		this.registeredHostsByConnection = new HashMap();
		this.deadHosts = new HashSet();
	}

	public void shutdown() {
		synchronized (this.lock) {
			if (this.isShutdown) {
				return;
			}
			this.isShutdown = true;

			for (Instance i : this.registeredHostsById.values()) {
				i.markDead();
			}

			this.registeredHostsById.clear();
			this.registeredHostsByConnection.clear();
			this.deadHosts.clear();
			this.totalNumberOfAliveTaskSlots = 0;
		}
	}

	public boolean reportHeartBeat(InstanceID instanceId, byte[] lastMetricsReport) {
		if (instanceId == null) {
			throw new IllegalArgumentException("InstanceID may not be null.");
		}

		synchronized (this.lock) {
			if (this.isShutdown) {
				return false;
			}

			Instance host = registeredHostsById.get(instanceId);

			if (host == null){
				if (LOG.isDebugEnabled()) {
					LOG.debug("Received heartbeat from unknown TaskManager with instance ID " + instanceId.toString() +
							" Possibly TaskManager was marked as dead (timed-out) earlier. " +
							"Reporting back that task manager is no longer known.");
				}
				return false;
			}

			host.reportHeartBeat();
			host.setMetricsReport(lastMetricsReport);

			if (LOG.isDebugEnabled()) {
				LOG.debug("Received heartbeat from TaskManager " + host);
			}

			return true;
		}
	}

	public InstanceID registerTaskManager(ActorRef taskManager, InstanceConnectionInfo connectionInfo, HardwareDescription resources, int numberOfSlots){
		synchronized(this.lock){
			if (this.isShutdown) {
				throw new IllegalStateException("InstanceManager is shut down.");
			}

			Instance prior = registeredHostsByConnection.get(taskManager);
			if (prior != null) {
				LOG.info("Registration attempt from TaskManager at " + taskManager.path() +
						". This connection is already registered under ID " + prior.getId());
				return null;
			}

			boolean wasDead = this.deadHosts.remove(taskManager);
			if (wasDead) {
				LOG.info("Registering TaskManager at " + taskManager.path() +
						" which was marked as dead earlier because of a heart-beat timeout.");
			}

			InstanceID id;
			do {
				id = new InstanceID();
			} while (registeredHostsById.containsKey(id));


			Instance host = new Instance(taskManager, connectionInfo, id, resources, numberOfSlots);

			registeredHostsById.put(id, host);
			registeredHostsByConnection.put(taskManager, host);

			totalNumberOfAliveTaskSlots += numberOfSlots;

			if (LOG.isInfoEnabled()) {
				LOG.info(String.format("Registered TaskManager at %s (%s) as %s. Current number of registered hosts is %d.",
						connectionInfo.getHostname(), taskManager.path(), id, registeredHostsById.size()));
			}

			host.reportHeartBeat();

			// notify all listeners (for example the scheduler)
			notifyNewInstance(host);

			return id;
		}
	}

	public void unregisterTaskManager(ActorRef taskManager){
		Instance host = registeredHostsByConnection.get(taskManager);

		if(host != null){
			registeredHostsByConnection.remove(taskManager);
			registeredHostsById.remove(host.getId());
			deadHosts.add(taskManager);

			host.markDead();

			totalNumberOfAliveTaskSlots -= host.getTotalNumberOfSlots();

			notifyDeadInstance(host);

			LOG.info("Unregistered task manager " + taskManager.path().address() + ". Number of " +
					"registered task managers " + getNumberOfRegisteredTaskManagers() + ". Number" +
					" of available slots " + getTotalNumberOfSlots() + ".");
		}
	}

	public boolean isRegistered(ActorRef taskManager) {
		return registeredHostsByConnection.containsKey(taskManager);
	}

	public int getNumberOfRegisteredTaskManagers() {
		return this.registeredHostsById.size();
	}

	public int getTotalNumberOfSlots() {
		return this.totalNumberOfAliveTaskSlots;
	}

	public Collection getAllRegisteredInstances() {
		synchronized (this.lock) {
			// return a copy (rather than a Collections.unmodifiable(...) wrapper), such that
			// concurrent modifications do not interfere with the traversals or lookups
			return new HashSet(registeredHostsById.values());
		}
	}

	public Instance getRegisteredInstanceById(InstanceID instanceID) {
		return registeredHostsById.get(instanceID);
	}

	public Instance getRegisteredInstance(ActorRef ref) {
		return registeredHostsByConnection.get(ref);
	}

	// --------------------------------------------------------------------------------------------

	public void addInstanceListener(InstanceListener listener) {
		synchronized (this.instanceListeners) {
			this.instanceListeners.add(listener);
		}
	}

	public void removeInstanceListener(InstanceListener listener) {
		synchronized (this.instanceListeners) {
			this.instanceListeners.remove(listener);
		}
	}

	private void notifyNewInstance(Instance instance) {
		synchronized (this.instanceListeners) {
			for (InstanceListener listener : this.instanceListeners) {
				try {
					listener.newInstanceAvailable(instance);
				}
				catch (Throwable t) {
					LOG.error("Notification of new instance availability failed.", t);
				}
			}
		}
	}

	private void notifyDeadInstance(Instance instance) {
		synchronized (this.instanceListeners) {
			for (InstanceListener listener : this.instanceListeners) {
				try {
					listener.instanceDied(instance);
				} catch (Throwable t) {
					LOG.error("Notification of dead instance failed.", t);
				}
			}
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy