All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.resourcemanager.autoscale.plugins.calculators.ExponentiallyClusterScaler Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.resourcemanager.autoscale.plugins.calculators;

import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.runtime.resourcemanager.autoscale.ResourceAutoScaler;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.ResourceCalculator;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.ResourceSymptom;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterHighBlockRequests;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterHighCpu;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterHighMemory;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterLongUnfulfilledQueue;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterLowCpu;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterLowMemory;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterShortUnfulfilledQueue;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterZeroBlockRequests;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

/**
 * Exponentially scaler resolve the cluster resource auto-scale.
 * It increase the TM number exponentially when scale-out, while decrease the TM number linearly when scale-in.
 */
public class ExponentiallyClusterScaler implements ResourceCalculator {

	private static final Logger LOGGER = LoggerFactory.getLogger(ExponentiallyClusterScaler.class);

	public static final ConfigOption EXPONENTIAL_SCALE_INIT_STEP =
		ConfigOptions.key("session-auto-scale.exponential-scale.init-step").defaultValue(2);

	private ResourceAutoScaler resourceAutoScaler;

	private int nowScaleOutStep;

	private ClusterHighCpu clusterHighCpu;
	private ClusterLowCpu clusterLowCpu;
	private ClusterHighMemory clusterHighMemory;
	private ClusterLowMemory clusterLowMemory;
	private ClusterLongUnfulfilledQueue clusterLongUnfulfilledQueue;
	private ClusterShortUnfulfilledQueue clusterShortUnfulfilledQueue;
	private ClusterHighBlockRequests clusterHighBlockRequests;
	private ClusterZeroBlockRequests clusterZeroBlockRequests;

	@Override
	public void open(ResourceAutoScaler resourceAutoScaler) {
		this.resourceAutoScaler = resourceAutoScaler;
		nowScaleOutStep = resourceAutoScaler.getConfig().getInteger(EXPONENTIAL_SCALE_INIT_STEP);
	}

	@Override
	public void close() {

	}

	@Override
	public int calculate(List symptomList, int currentTaskManagers) {
		diagnose(symptomList);

		int targetTaskManagers = currentTaskManagers;

		if (clusterHighMemory != null || clusterHighCpu != null ||
			clusterLongUnfulfilledQueue != null || clusterHighBlockRequests != null) {
			LOGGER.info("Detect event {} {} {} {}, trigger scale-out with current tm {} and scale step {}.",
				clusterHighCpu, clusterHighMemory, clusterHighBlockRequests, clusterLongUnfulfilledQueue, currentTaskManagers, nowScaleOutStep);
			targetTaskManagers = currentTaskManagers + nowScaleOutStep;

			// Exponentially update the scale out step.
			nowScaleOutStep <<= 1;
			nowScaleOutStep = Math.min(nowScaleOutStep,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MAX_TM));

			targetTaskManagers = Math.min(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MAX_TM));
			targetTaskManagers = Math.max(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MIN_TM));
			return targetTaskManagers;
		}

		// If not trigger scale-out, we need to reset the scale-out step to the init state.
		nowScaleOutStep = resourceAutoScaler.getConfig().getInteger(EXPONENTIAL_SCALE_INIT_STEP);

		if (clusterZeroBlockRequests != null && clusterShortUnfulfilledQueue != null &&
			(clusterLowMemory != null && clusterLowCpu != null)) {
			LOGGER.info("Detect event {} {} {} {}, trigger scale-in with current tm {} and scale step {}.",
				clusterZeroBlockRequests, clusterShortUnfulfilledQueue, clusterLowMemory, clusterLowCpu, currentTaskManagers, 1);
			targetTaskManagers = currentTaskManagers - 1;
			targetTaskManagers = Math.min(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MAX_TM));
			targetTaskManagers = Math.max(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MIN_TM));
			return targetTaskManagers;
		}

		targetTaskManagers = Math.min(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MAX_TM));
		targetTaskManagers = Math.max(targetTaskManagers,
				resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.RESOURCE_LIMIT_CLUSTER_MIN_TM));
		return targetTaskManagers;
	}

	public boolean diagnose(List symptomList) {
		clusterHighCpu = null;
		clusterLowCpu = null;
		clusterHighMemory = null;
		clusterLowMemory = null;
		clusterLongUnfulfilledQueue = null;
		clusterShortUnfulfilledQueue = null;
		clusterHighBlockRequests = null;
		clusterZeroBlockRequests = null;

		for (ResourceSymptom symptom : symptomList) {
			if (symptom instanceof ClusterHighMemory) {
				clusterHighMemory = (ClusterHighMemory) symptom;
				LOGGER.debug("Cluster high memory detected.");
				continue;
			}

			if (symptom instanceof ClusterLowMemory) {
				clusterLowMemory = (ClusterLowMemory) symptom;
				LOGGER.debug("Cluster low memory detected.");
				continue;
			}

			if (symptom instanceof ClusterHighCpu) {
				clusterHighCpu = (ClusterHighCpu) symptom;
				LOGGER.debug("Cluster high cpu detected.");
				continue;
			}

			if (symptom instanceof ClusterLowCpu) {
				clusterLowCpu = (ClusterLowCpu) symptom;
				LOGGER.debug("Cluster low cpu detected.");
				continue;
			}

			if (symptom instanceof ClusterZeroBlockRequests) {
				clusterZeroBlockRequests = (ClusterZeroBlockRequests) symptom;
				LOGGER.debug("Cluster zero block request detected.");
				continue;
			}

			if (symptom instanceof ClusterHighBlockRequests) {
				clusterHighBlockRequests = (ClusterHighBlockRequests) symptom;
				LOGGER.debug("Cluster high block request detected.");
				continue;
			}

			if (symptom instanceof ClusterLongUnfulfilledQueue) {
				clusterLongUnfulfilledQueue = (ClusterLongUnfulfilledQueue) symptom;
				LOGGER.debug("Cluster long unfulfilled queue detected.");
			}

			if (symptom instanceof ClusterShortUnfulfilledQueue) {
				clusterShortUnfulfilledQueue = (ClusterShortUnfulfilledQueue) symptom;
				LOGGER.debug("Cluster long short queue detected.");
			}
		}

		return true;
	}
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy