org.apache.flink.runtime.healthmanager.plugins.resolvers.ParallelismResolverUtils Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of flink-runtime_2.11 Show documentation
There is a newer version: 1.5.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.healthmanager.plugins.resolvers;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.healthmanager.plugins.utils.HealthMonitorOptions;
import org.apache.flink.runtime.healthmanager.plugins.utils.JobTopologyAnalyzer;
import org.apache.flink.runtime.healthmanager.plugins.utils.TaskMetrics;
import org.apache.flink.runtime.jobgraph.JobVertexID;

import java.util.HashMap;
import java.util.Map;

/**
 * Utils to estimate current workload.
 */
public class ParallelismResolverUtils {

	private JobTopologyAnalyzer jobTopologyAnalyzer;

	private boolean scaleByMaxWorkload;
	private Map vertexMaxWorkload = new HashMap<>();

	public ParallelismResolverUtils(JobTopologyAnalyzer jobTopologyAnalyzer, Configuration config) {
		this.jobTopologyAnalyzer = jobTopologyAnalyzer;
		this.scaleByMaxWorkload = config.getBoolean(HealthMonitorOptions.PARALLELISM_SCALE_BY_MAX_WORKLOAD);
	}

	public Map estimateVertexWorkload(Map taskMetrics) {

		Map result = new HashMap<>();
		Map subDagRatio = estimateSubDagWorkloadRatio(taskMetrics);
		for (JobVertexID vertexID : taskMetrics.keySet()) {
			result.put(vertexID, taskMetrics.get(vertexID).getWorkload() * subDagRatio.get(jobTopologyAnalyzer.getSubDagRoot(vertexID)));
		}

		if (scaleByMaxWorkload) {
			for (JobVertexID vertexID : result.keySet()) {
				if (result.get(vertexID) < vertexMaxWorkload.getOrDefault(vertexID, 0.0)) {
					result.put(vertexID, vertexMaxWorkload.getOrDefault(vertexID, 0.0));
				} else {
					vertexMaxWorkload.put(vertexID, result.get(vertexID));
				}
			}
		}
		return result;
	}

	public Map estimateSubDagWorkloadRatio(
			Map taskMetrics) {

		Map subDagRatio = new HashMap<>();

		// estimate all source sub dag workload
		for (JobVertexID vertexId : jobTopologyAnalyzer.getAllSubDagRoots()) {
			if (!jobTopologyAnalyzer.isSource(vertexId)) {
				continue;
			}

			TaskMetrics metric = taskMetrics.get(vertexId);

			double ratio = 1;
			if (metric.getDelayIncreasingRate() > 0 && metric.getDelayIncreasingRate() < 1) {
				ratio = 1 / (1 - metric.getDelayIncreasingRate());
			}

			if (metric.isParallelSource() && metric.getWorkload() > 0) {
				double maxTps = 1.0 / Math.max(
						metric.getPartitionLatency(),
						metric.getTaskLatencyPerRecord() - metric.getWaitOutputPerRecord()) * metric.getPartitionCount();

				if (maxTps / metric.getInputTps() < ratio) {
					ratio = maxTps / metric.getInputTps();
				}
			}
			subDagRatio.put(vertexId, ratio);
		}

		// scale up downstream sub dags according to upstream sub dags
		boolean checkAgain = true;
		while (checkAgain) {
			checkAgain = false;
			for (JobVertexID root : jobTopologyAnalyzer.getAllSubDagRoots()) {
				for (JobVertexID upStream : jobTopologyAnalyzer.getInputs(root)) {
					JobVertexID upStreamSubDagRoot = jobTopologyAnalyzer.getSubDagRoot(upStream);

					if (!subDagRatio.containsKey(upStreamSubDagRoot)) {
						continue;
					}

					if (!subDagRatio.containsKey(root) ||
							subDagRatio.get(root) < subDagRatio.get(upStreamSubDagRoot)) {
						subDagRatio.put(root, subDagRatio.get(upStreamSubDagRoot));
						checkAgain = true;
					}
				}
			}
		}
		return subDagRatio;
	}

}