org.apache.flink.runtime.resourcemanager.autoscale.plugins.detectors.ClusterCpuUsageDetector Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.resourcemanager.autoscale.plugins.detectors;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.metrics.Histogram;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.metrics.SimpleHistogram;
import org.apache.flink.runtime.resourcemanager.autoscale.ResourceAutoScaler;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.ResourceDetector;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.ResourceSymptom;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterHighCpu;
import org.apache.flink.runtime.resourcemanager.autoscale.plugins.symptoms.ClusterLowCpu;
import org.apache.flink.runtime.resourcemanager.autoscale.utils.SlotManagerInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
/**
* ClusterCpuUsageDetector detects cluster cpu usage.
* Detects {@link ClusterHighCpu} if the max avg cpu usage of cluster
* is higher than threshold.
* Detects {@link ClusterLowCpu} if the max avg cpu usage of cluster
* is lower than threshold.
*/
public class ClusterCpuUsageDetector implements ResourceDetector {
private static final Logger LOGGER = LoggerFactory.getLogger(ClusterCpuUsageDetector.class);
public static final ConfigOption HIGH_CPU_THRESHOLD =
ConfigOptions.key("session-auto-scale.cluster-cpu-usage-detector.threashold.high").defaultValue(1.0);
public static final ConfigOption LOW_CPU_THRESHOLD =
ConfigOptions.key("session-auto-scale.cluster-cpu-usage-detector.threashold.low").defaultValue(0.0);
private ResourceAutoScaler resourceAutoScaler;
private int checkInterval;
private double highThreshold;
private double lowThreshold;
private Histogram cpuAvailableHistogram;
private Histogram cpuCapacityHistogram;
@Override
public void open(ResourceAutoScaler resourceAutoScaler) {
this.resourceAutoScaler = resourceAutoScaler;
checkInterval = resourceAutoScaler.getConfig().getInteger(ResourceAutoScaler.METRIC_UPDATE_INTERVAL);
highThreshold = resourceAutoScaler.getConfig().getDouble(HIGH_CPU_THRESHOLD);
lowThreshold = resourceAutoScaler.getConfig().getDouble(LOW_CPU_THRESHOLD);
cpuAvailableHistogram = new SimpleHistogram(checkInterval);
cpuCapacityHistogram = new SimpleHistogram(checkInterval);
}
@Override
public void close() {
}
@Override
public void update(SlotManagerInfo slotManagerInfo, Collection> taskManagersProfiles) {
long cpuCapacity = 0;
long cpuAvailable = 0;
for (Tuple2 resourceProfileTuple : taskManagersProfiles) {
cpuCapacity += resourceProfileTuple.f0.getCpuCores() * 1000L;
cpuAvailable += resourceProfileTuple.f1.getCpuCores() * 1000L;
}
cpuCapacityHistogram.update(cpuCapacity);
cpuAvailableHistogram.update(cpuAvailable);
}
@Override
public ResourceSymptom detect() throws Exception {
LOGGER.debug("Start detecting");
double capacity = cpuCapacityHistogram.getStatistics().getMean();
double available = cpuAvailableHistogram.getStatistics().getMean();
LOGGER.debug("Cluster cpu capacity {}, available {}.", capacity, available);
if (capacity == 0.0) {
LOGGER.warn("Cluster has non cpu resource, capacity is 0.");
return null;
}
double utility = (capacity - available) / capacity;
if (utility >= highThreshold) {
LOGGER.debug("Cpu high detected for Cluster, capacity {}, available {}, utility {}", capacity, available, utility);
return new ClusterHighCpu(available, capacity, utility);
} else if (utility <= lowThreshold) {
LOGGER.debug("Cpu low detected for Cluster, capacity {}, available {}, utility {}", capacity, available, utility);
return new ClusterLowCpu(available, capacity, utility);
}
return null;
}
}