All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.resourcemanager.autoscale.plugins.calculators.TargetTrackerScaler Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.resourcemanager.autoscale.plugins.calculators;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.runtime.resourcemanager.autoscale.ResourceAutoScaler;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.ResourceCalculator;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.ResourceSymptom;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.detectors.ClusterCpuUsageDetector;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.detectors.ClusterMemoryUsageDetector;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.detectors.ClusterUnfulfilledQueueDetector;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterHighBlockRequests;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterHighCpu;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterHighMemory;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterLongUnfulfilledQueue;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterLowCpu;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterLowMemory;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

/**
 * Target tracker scaler resolve the cluster resource auto-scale.
 * It scale out/in the cluster to the estimated number when the tracked metrics above/below the expected threshold,
 * e.g. when the tracked metrics grow above the high threshold, it estimated the TM number to decrease to the high threshold.
 * The estimated number is also restricted by {@link ResourceAutoScaler#RESOURCE_LIMIT_CLUSTER_MAX_TM}
 * and {@link ResourceAutoScaler#RESOURCE_LIMIT_CLUSTER_MIN_TM}.
 * It currently does not support block request number metrics.
 */
public class TargetTrackerScaler implements ResourceCalculator {

	private static final Logger LOGGER = LoggerFactory.getLogger(TargetTrackerScaler.class);

	private ResourceAutoScaler resourceAutoScaler;
	private Configuration configuration;

	private ClusterHighCpu clusterHighCpu;
	private ClusterLowCpu clusterLowCpu;
	private ClusterHighMemory clusterHighMemory;
	private ClusterLowMemory clusterLowMemory;
	private ClusterLongUnfulfilledQueue clusterLongUnfulfilledQueue;
	private ClusterHighBlockRequests clusterHighBlockRequests;

	private double cpuHighThreshold;
	private double cpuLowThreshold;
	private double memoryHighThreshold;
	private double memoryLowThreshold;
	private int unfulfilledQueueHighThreshold;
	private int taskManagerSlots;

	@Override
	public void open(ResourceAutoScaler resourceAutoScaler) {
		this.resourceAutoScaler = resourceAutoScaler;
		this.configuration = resourceAutoScaler.getConfig();

		this.cpuHighThreshold = configuration.getDouble(ClusterCpuUsageDetector.HIGH_CPU_THRESHOLD);
		this.cpuLowThreshold = configuration.getDouble(ClusterCpuUsageDetector.LOW_CPU_THRESHOLD);
		this.memoryHighThreshold = configuration.getDouble(ClusterMemoryUsageDetector.HIGH_MEM_THRESHOLD);
		this.memoryLowThreshold = configuration.getDouble(ClusterMemoryUsageDetector.LOW_MEM_THRESHOLD);
		this.unfulfilledQueueHighThreshold = configuration.getInteger(ClusterUnfulfilledQueueDetector.HIGH_UNFULFILLED_QUEUE_LENGTH_THRESHOLD);
		this.taskManagerSlots = configuration.getInteger(TaskManagerOptions.NUM_TASK_SLOTS);
	}

	@Override
	public void close() {

	}

	@Override
	public int calculate(List symptomList, int currentTaskManagers) {
		diagnose(symptomList);

		int targetTaskManagers = currentTaskManagers;

		if (clusterHighMemory != null || clusterHighCpu != null || clusterLongUnfulfilledQueue != null) {
			LOGGER.info("Detect event {} {} {}, trigger scale-out with current tm {}.",
				clusterHighCpu, clusterHighMemory, clusterLongUnfulfilledQueue, currentTaskManagers);

			// The target TM number should be the maximum of all estimated number.
			if (clusterHighCpu != null) {
				targetTaskManagers = Math.max(targetTaskManagers,
					(int) Math.ceil(currentTaskManagers * clusterHighCpu.getUtility() / cpuHighThreshold));
			}

			if (clusterHighMemory != null) {
				targetTaskManagers = Math.max(targetTaskManagers,
					(int) Math.ceil(currentTaskManagers * clusterHighMemory.getUtility() / memoryHighThreshold));
			}

			if (clusterLongUnfulfilledQueue != null) {
				targetTaskManagers = Math.max(targetTaskManagers,
					currentTaskManagers +
						(int) Math.ceil((clusterLongUnfulfilledQueue.getLength() - unfulfilledQueueHighThreshold) / taskManagerSlots));
			}
		} else if (clusterHighBlockRequests == null && (clusterLowMemory != null && clusterLowCpu != null)) {
			// We trigger scale in when both cpu and memory lower than threshold.
			LOGGER.info("Detect event {} {}, trigger scale-down with current tm {}.",
				clusterLowMemory, clusterLowCpu, currentTaskManagers);

			// The target TM number should be the maximum of all estimated number.
			targetTaskManagers = Math.max(
				(int) Math.ceil(currentTaskManagers * clusterLowMemory.getUtility() / memoryLowThreshold),
				(int) Math.ceil(currentTaskManagers * clusterLowCpu.getUtility() / cpuLowThreshold));
		}

		targetTaskManagers = Math.min(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MAX_TM));
		targetTaskManagers = Math.max(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MIN_TM));
		return targetTaskManagers;
	}

	public boolean diagnose(List symptomList) {
		clusterHighCpu = null;
		clusterLowCpu = null;
		clusterHighMemory = null;
		clusterLowMemory = null;
		clusterLongUnfulfilledQueue = null;
		clusterHighBlockRequests = null;

		for (ResourceSymptom symptom : symptomList) {
			if (symptom instanceof ClusterHighMemory) {
				clusterHighMemory = (ClusterHighMemory) symptom;
				LOGGER.debug("Cluster high memory detected.");
				continue;
			}

			if (symptom instanceof ClusterLowMemory) {
				clusterLowMemory = (ClusterLowMemory) symptom;
				LOGGER.debug("Cluster low memory detected.");
				continue;
			}

			if (symptom instanceof ClusterHighCpu) {
				clusterHighCpu = (ClusterHighCpu) symptom;
				LOGGER.debug("Cluster high cpu detected.");
				continue;
			}

			if (symptom instanceof ClusterLowCpu) {
				clusterLowCpu = (ClusterLowCpu) symptom;
				LOGGER.debug("Cluster low cpu detected.");
				continue;
			}

			if (symptom instanceof ClusterHighBlockRequests) {
				clusterHighBlockRequests = (ClusterHighBlockRequests) symptom;
				LOGGER.debug("Cluster high block request detected.");
				continue;
			}

			if (symptom instanceof ClusterLongUnfulfilledQueue) {
				clusterLongUnfulfilledQueue = (ClusterLongUnfulfilledQueue) symptom;
				LOGGER.debug("Cluster long unfulfilled queue detected.");
			}
		}

		return true;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy