All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.optimizer.costs.CostEstimator Maven / Gradle / Ivy

There is a newer version: 1.20.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.optimizer.costs;

import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.dag.EstimateProvider;
import org.apache.flink.optimizer.dag.TempMode;
import org.apache.flink.optimizer.plan.Channel;
import org.apache.flink.optimizer.plan.PlanNode;

import java.util.Iterator;

/**
 * Abstract base class for a cost estimator. Defines cost estimation methods and implements the
 * basic work method that computes the cost of an operator by adding input shipping cost, input
 * local cost, and driver cost.
 */
public abstract class CostEstimator {

    public abstract void addRandomPartitioningCost(EstimateProvider estimates, Costs costs);

    public abstract void addHashPartitioningCost(EstimateProvider estimates, Costs costs);

    public abstract void addRangePartitionCost(EstimateProvider estimates, Costs costs);

    public abstract void addBroadcastCost(
            EstimateProvider estimates, int replicationFactor, Costs costs);

    // ------------------------------------------------------------------------

    public abstract void addFileInputCost(long fileSizeInBytes, Costs costs);

    public abstract void addLocalSortCost(EstimateProvider estimates, Costs costs);

    public abstract void addLocalMergeCost(
            EstimateProvider estimates1, EstimateProvider estimates2, Costs costs, int costWeight);

    public abstract void addHybridHashCosts(
            EstimateProvider buildSide, EstimateProvider probeSide, Costs costs, int costWeight);

    public abstract void addCachedHybridHashCosts(
            EstimateProvider buildSide, EstimateProvider probeSide, Costs costs, int costWeight);

    public abstract void addStreamedNestedLoopsCosts(
            EstimateProvider outerSide,
            EstimateProvider innerSide,
            long bufferSize,
            Costs costs,
            int costWeight);

    public abstract void addBlockNestedLoopsCosts(
            EstimateProvider outerSide,
            EstimateProvider innerSide,
            long blockSize,
            Costs costs,
            int costWeight);

    // ------------------------------------------------------------------------

    public abstract void addArtificialDamCost(
            EstimateProvider estimates, long bufferSize, Costs costs);

    // ------------------------------------------------------------------------

    /**
     * This method computes the cost of an operator. The cost is composed of cost for input
     * shipping, locally processing an input, and running the operator.
     *
     * 

It requires at least that all inputs are set and have a proper ship strategy set, which is * not equal to NONE. * * @param n The node to compute the costs for. */ public void costOperator(PlanNode n) { // initialize costs objects with no costs final Costs totalCosts = new Costs(); final long availableMemory = n.getGuaranteedAvailableMemory(); // add the shipping strategy costs for (Channel channel : n.getInputs()) { final Costs costs = new Costs(); // Plans that apply the same strategies, but at different points // are equally expensive. For example, if a partitioning can be // pushed below a Map function there is often no difference in plan // costs between the pushed down version and the version that partitions // after the Mapper. However, in those cases, we want the expensive // strategy to appear later in the plan, as data reduction often occurs // by large factors, while blowup is rare and typically by smaller fractions. // We achieve this by adding a penalty to small penalty to the FORWARD strategy, // weighted by the current plan depth (steps to the earliest data source). // that way, later FORWARDS are more expensive than earlier forwards. // Note that this only applies to the heuristic costs. switch (channel.getShipStrategy()) { case NONE: throw new CompilerException( "Cannot determine costs: Shipping strategy has not been set for an input."); case FORWARD: // costs.addHeuristicNetworkCost(channel.getMaxDepth()); break; case PARTITION_RANDOM: addRandomPartitioningCost(channel, costs); break; case PARTITION_HASH: case PARTITION_CUSTOM: addHashPartitioningCost(channel, costs); break; case PARTITION_RANGE: addRangePartitionCost(channel, costs); break; case BROADCAST: addBroadcastCost(channel, channel.getReplicationFactor(), costs); break; case PARTITION_FORCED_REBALANCE: addRandomPartitioningCost(channel, costs); break; default: throw new CompilerException( "Unknown shipping strategy for input: " + channel.getShipStrategy()); } switch (channel.getLocalStrategy()) { case NONE: break; case SORT: case COMBININGSORT: addLocalSortCost(channel, costs); break; default: throw new CompilerException( "Unsupported local strategy for input: " + channel.getLocalStrategy()); } if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) { addArtificialDamCost(channel, 0, costs); } // adjust with the cost weight factor if (channel.isOnDynamicPath()) { costs.multiplyWith(channel.getCostWeight()); } totalCosts.addCosts(costs); } Channel firstInput = null; Channel secondInput = null; Costs driverCosts = new Costs(); int costWeight = 1; // adjust with the cost weight factor if (n.isOnDynamicPath()) { costWeight = n.getCostWeight(); } // get the inputs, if we have some { Iterator channels = n.getInputs().iterator(); if (channels.hasNext()) { firstInput = channels.next(); } if (channels.hasNext()) { secondInput = channels.next(); } } // determine the local costs switch (n.getDriverStrategy()) { case NONE: case UNARY_NO_OP: case BINARY_NO_OP: case MAP: case MAP_PARTITION: case FLAT_MAP: case ALL_GROUP_REDUCE: case ALL_REDUCE: // this operations does not do any actual grouping, since every element is in the // same single group case CO_GROUP: case CO_GROUP_RAW: case SORTED_GROUP_REDUCE: case SORTED_REDUCE: // grouping or co-grouping over sorted streams for free case SORTED_GROUP_COMBINE: // partial grouping is always local and main memory resident. we should add a // relative cpu cost at some point // partial grouping is always local and main memory resident. we should add a // relative cpu cost at some point case ALL_GROUP_COMBINE: case UNION: // pipelined local union is for free break; case INNER_MERGE: case FULL_OUTER_MERGE: case LEFT_OUTER_MERGE: case RIGHT_OUTER_MERGE: addLocalMergeCost(firstInput, secondInput, driverCosts, costWeight); break; case HYBRIDHASH_BUILD_FIRST: case RIGHT_HYBRIDHASH_BUILD_FIRST: case LEFT_HYBRIDHASH_BUILD_FIRST: case FULL_OUTER_HYBRIDHASH_BUILD_FIRST: addHybridHashCosts(firstInput, secondInput, driverCosts, costWeight); break; case HYBRIDHASH_BUILD_SECOND: case LEFT_HYBRIDHASH_BUILD_SECOND: case RIGHT_HYBRIDHASH_BUILD_SECOND: case FULL_OUTER_HYBRIDHASH_BUILD_SECOND: addHybridHashCosts(secondInput, firstInput, driverCosts, costWeight); break; case HYBRIDHASH_BUILD_FIRST_CACHED: addCachedHybridHashCosts(firstInput, secondInput, driverCosts, costWeight); break; case HYBRIDHASH_BUILD_SECOND_CACHED: addCachedHybridHashCosts(secondInput, firstInput, driverCosts, costWeight); break; case NESTEDLOOP_BLOCKED_OUTER_FIRST: addBlockNestedLoopsCosts( firstInput, secondInput, availableMemory, driverCosts, costWeight); break; case NESTEDLOOP_BLOCKED_OUTER_SECOND: addBlockNestedLoopsCosts( secondInput, firstInput, availableMemory, driverCosts, costWeight); break; case NESTEDLOOP_STREAMED_OUTER_FIRST: addStreamedNestedLoopsCosts( firstInput, secondInput, availableMemory, driverCosts, costWeight); break; case NESTEDLOOP_STREAMED_OUTER_SECOND: addStreamedNestedLoopsCosts( secondInput, firstInput, availableMemory, driverCosts, costWeight); break; default: throw new CompilerException( "Unknown local strategy: " + n.getDriverStrategy().name()); } totalCosts.addCosts(driverCosts); n.setCosts(totalCosts); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy