org.apache.flink.runtime.healthmanager.plugins.resolvers.ParallelismResolverUtils Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.healthmanager.plugins.resolvers;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.healthmanager.plugins.utils.HealthMonitorOptions;
import org.apache.flink.runtime.healthmanager.plugins.utils.JobTopologyAnalyzer;
import org.apache.flink.runtime.healthmanager.plugins.utils.TaskMetrics;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import java.util.HashMap;
import java.util.Map;
/**
* Utils to estimate current workload.
*/
public class ParallelismResolverUtils {
private JobTopologyAnalyzer jobTopologyAnalyzer;
private boolean scaleByMaxWorkload;
private Map vertexMaxWorkload = new HashMap<>();
public ParallelismResolverUtils(JobTopologyAnalyzer jobTopologyAnalyzer, Configuration config) {
this.jobTopologyAnalyzer = jobTopologyAnalyzer;
this.scaleByMaxWorkload = config.getBoolean(HealthMonitorOptions.PARALLELISM_SCALE_BY_MAX_WORKLOAD);
}
public Map estimateVertexWorkload(Map taskMetrics) {
Map result = new HashMap<>();
Map subDagRatio = estimateSubDagWorkloadRatio(taskMetrics);
for (JobVertexID vertexID : taskMetrics.keySet()) {
result.put(vertexID, taskMetrics.get(vertexID).getWorkload() * subDagRatio.get(jobTopologyAnalyzer.getSubDagRoot(vertexID)));
}
if (scaleByMaxWorkload) {
for (JobVertexID vertexID : result.keySet()) {
if (result.get(vertexID) < vertexMaxWorkload.getOrDefault(vertexID, 0.0)) {
result.put(vertexID, vertexMaxWorkload.getOrDefault(vertexID, 0.0));
} else {
vertexMaxWorkload.put(vertexID, result.get(vertexID));
}
}
}
return result;
}
public Map estimateSubDagWorkloadRatio(
Map taskMetrics) {
Map subDagRatio = new HashMap<>();
// estimate all source sub dag workload
for (JobVertexID vertexId : jobTopologyAnalyzer.getAllSubDagRoots()) {
if (!jobTopologyAnalyzer.isSource(vertexId)) {
continue;
}
TaskMetrics metric = taskMetrics.get(vertexId);
double ratio = 1;
if (metric.getDelayIncreasingRate() > 0 && metric.getDelayIncreasingRate() < 1) {
ratio = 1 / (1 - metric.getDelayIncreasingRate());
}
if (metric.isParallelSource() && metric.getWorkload() > 0) {
double maxTps = 1.0 / Math.max(
metric.getPartitionLatency(),
metric.getTaskLatencyPerRecord() - metric.getWaitOutputPerRecord()) * metric.getPartitionCount();
if (maxTps / metric.getInputTps() < ratio) {
ratio = maxTps / metric.getInputTps();
}
}
subDagRatio.put(vertexId, ratio);
}
// scale up downstream sub dags according to upstream sub dags
boolean checkAgain = true;
while (checkAgain) {
checkAgain = false;
for (JobVertexID root : jobTopologyAnalyzer.getAllSubDagRoots()) {
for (JobVertexID upStream : jobTopologyAnalyzer.getInputs(root)) {
JobVertexID upStreamSubDagRoot = jobTopologyAnalyzer.getSubDagRoot(upStream);
if (!subDagRatio.containsKey(upStreamSubDagRoot)) {
continue;
}
if (!subDagRatio.containsKey(root) ||
subDagRatio.get(root) < subDagRatio.get(upStreamSubDagRoot)) {
subDagRatio.put(root, subDagRatio.get(upStreamSubDagRoot));
checkAgain = true;
}
}
}
}
return subDagRatio;
}
}