All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.optimizer.dag.SingleInputNode Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.flink.optimizer.dag;

import static org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport.FOUND_SOURCE;
import static org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport.FOUND_SOURCE_AND_DAM;
import static org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport.NOT_FOUND;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.flink.api.common.ExecutionMode;
import org.apache.flink.api.common.operators.Operator;
import org.apache.flink.api.common.operators.SemanticProperties;
import org.apache.flink.api.common.operators.SingleInputOperator;
import org.apache.flink.api.common.operators.util.FieldSet;
import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.Optimizer;
import org.apache.flink.optimizer.costs.CostEstimator;
import org.apache.flink.optimizer.dataproperties.GlobalProperties;
import org.apache.flink.optimizer.dataproperties.InterestingProperties;
import org.apache.flink.optimizer.dataproperties.LocalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedLocalProperties;
import org.apache.flink.optimizer.operators.OperatorDescriptorSingle;
import org.apache.flink.optimizer.plan.Channel;
import org.apache.flink.optimizer.plan.NamedChannel;
import org.apache.flink.optimizer.plan.PlanNode;
import org.apache.flink.optimizer.plan.SingleInputPlanNode;
import org.apache.flink.optimizer.plan.PlanNode.SourceAndDamReport;
import org.apache.flink.optimizer.util.NoOpUnaryUdfOp;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.io.network.DataExchangeMode;
import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
import org.apache.flink.util.Visitor;

import com.google.common.collect.Sets;

/**
 * A node in the optimizer's program representation for an operation with a single input.
 * 
 * This class contains all the generic logic for handling branching flows, as well as to
 * enumerate candidate execution plans. The subclasses for specific operators simply add logic
 * for cost estimates and specify possible strategies for their execution.
 */
public abstract class SingleInputNode extends OptimizerNode {
	
	protected final FieldSet keys; 			// The set of key fields
	
	protected DagConnection inConn; 		// the input of the node
	
	// --------------------------------------------------------------------------------------------
	
	/**
	 * Creates a new node with a single input for the optimizer plan.
	 * 
	 * @param programOperator The PACT that the node represents.
	 */
	protected SingleInputNode(SingleInputOperator programOperator) {
		super(programOperator);
		
		int[] k = programOperator.getKeyColumns(0);
		this.keys = k == null || k.length == 0 ? null : new FieldSet(k);
	}
	
	protected SingleInputNode(FieldSet keys) {
		super(NoOpUnaryUdfOp.INSTANCE);
		this.keys = keys;
	}
	
	protected SingleInputNode() {
		super(NoOpUnaryUdfOp.INSTANCE);
		this.keys = null;
	}
	
	protected SingleInputNode(SingleInputNode toCopy) {
		super(toCopy);
		
		this.keys = toCopy.keys;
	}
	
	// --------------------------------------------------------------------------------------------

	@Override
	public SingleInputOperator getOperator() {
		return (SingleInputOperator) super.getOperator();
	}
	
	/**
	 * Gets the input of this operator.
	 * 
	 * @return The input.
	 */
	public DagConnection getIncomingConnection() {
		return this.inConn;
	}

	/**
	 * Sets the connection through which this node receives its input.
	 * 
	 * @param inConn The input connection to set.
	 */
	public void setIncomingConnection(DagConnection inConn) {
		this.inConn = inConn;
	}
	
	/**
	 * Gets the predecessor of this node.
	 * 
	 * @return The predecessor of this node. 
	 */
	public OptimizerNode getPredecessorNode() {
		if (this.inConn != null) {
			return this.inConn.getSource();
		} else {
			return null;
		}
	}

	@Override
	public List getIncomingConnections() {
		return Collections.singletonList(this.inConn);
	}
	

	@Override
	public SemanticProperties getSemanticProperties() {
		return getOperator().getSemanticProperties();
	}

	protected SemanticProperties getSemanticPropertiesForLocalPropertyFiltering() {
		return this.getSemanticProperties();
	}

	protected SemanticProperties getSemanticPropertiesForGlobalPropertyFiltering() {
		return this.getSemanticProperties();
	}

	@Override
	public void setInput(Map, OptimizerNode> contractToNode, ExecutionMode defaultExchangeMode)
			throws CompilerException
	{
		// see if an internal hint dictates the strategy to use
		final Configuration conf = getOperator().getParameters();
		final String shipStrategy = conf.getString(Optimizer.HINT_SHIP_STRATEGY, null);
		final ShipStrategyType preSet;
		
		if (shipStrategy != null) {
			if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH)) {
				preSet = ShipStrategyType.PARTITION_HASH;
			} else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE)) {
				preSet = ShipStrategyType.PARTITION_RANGE;
			} else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_FORWARD)) {
				preSet = ShipStrategyType.FORWARD;
			} else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION)) {
				preSet = ShipStrategyType.PARTITION_RANDOM;
			} else {
				throw new CompilerException("Unrecognized ship strategy hint: " + shipStrategy);
			}
		} else {
			preSet = null;
		}
		
		// get the predecessor node
		Operator children = ((SingleInputOperator) getOperator()).getInput();
		
		OptimizerNode pred;
		DagConnection conn;
		if (children == null) {
			throw new CompilerException("Error: Node for '" + getOperator().getName() + "' has no input.");
		} else {
			pred = contractToNode.get(children);
			conn = new DagConnection(pred, this, defaultExchangeMode);
			if (preSet != null) {
				conn.setShipStrategy(preSet);
			}
		}
		
		// create the connection and add it
		setIncomingConnection(conn);
		pred.addOutgoingConnection(conn);
	}
	
	// --------------------------------------------------------------------------------------------
	//                             Properties and Optimization
	// --------------------------------------------------------------------------------------------
	
	protected abstract List getPossibleProperties();
	
	@Override
	public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
		// get what we inherit and what is preserved by our user code 
		final InterestingProperties props = getInterestingProperties().filterByCodeAnnotations(this, 0);
		
		// add all properties relevant to this node
		for (OperatorDescriptorSingle dps : getPossibleProperties()) {
			for (RequestedGlobalProperties gp : dps.getPossibleGlobalProperties()) {
				
				if (gp.getPartitioning().isPartitionedOnKey()) {
					// make sure that among the same partitioning types, we do not push anything down that has fewer key fields
					
					for (RequestedGlobalProperties contained : props.getGlobalProperties()) {
						if (contained.getPartitioning() == gp.getPartitioning() && gp.getPartitionedFields().isValidSubset(contained.getPartitionedFields())) {
							props.getGlobalProperties().remove(contained);
							break;
						}
					}
				}
				
				props.addGlobalProperties(gp);
			}
			
			for (RequestedLocalProperties lp : dps.getPossibleLocalProperties()) {
				props.addLocalProperties(lp);
			}
		}
		this.inConn.setInterestingProperties(props);
		
		for (DagConnection conn : getBroadcastConnections()) {
			conn.setInterestingProperties(new InterestingProperties());
		}
	}
	

	@Override
	public List getAlternativePlans(CostEstimator estimator) {
		// check if we have a cached version
		if (this.cachedPlans != null) {
			return this.cachedPlans;
		}

		boolean childrenSkippedDueToReplicatedInput = false;

		// calculate alternative sub-plans for predecessor
		final List subPlans = getPredecessorNode().getAlternativePlans(estimator);
		final Set intGlobal = this.inConn.getInterestingProperties().getGlobalProperties();
		
		// calculate alternative sub-plans for broadcast inputs
		final List> broadcastPlanChannels = new ArrayList>();
		List broadcastConnections = getBroadcastConnections();
		List broadcastConnectionNames = getBroadcastConnectionNames();

		for (int i = 0; i < broadcastConnections.size(); i++ ) {
			DagConnection broadcastConnection = broadcastConnections.get(i);
			String broadcastConnectionName = broadcastConnectionNames.get(i);
			List broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);

			// wrap the plan candidates in named channels
			HashSet broadcastChannels = new HashSet(broadcastPlanCandidates.size());
			for (PlanNode plan: broadcastPlanCandidates) {
				NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
				DataExchangeMode exMode = DataExchangeMode.select(broadcastConnection.getDataExchangeMode(),
										ShipStrategyType.BROADCAST, broadcastConnection.isBreakingPipeline());
				c.setShipStrategy(ShipStrategyType.BROADCAST, exMode);
				broadcastChannels.add(c);
			}
			broadcastPlanChannels.add(broadcastChannels);
		}

		final RequestedGlobalProperties[] allValidGlobals;
		{
			Set pairs = new HashSet();
			for (OperatorDescriptorSingle ods : getPossibleProperties()) {
				pairs.addAll(ods.getPossibleGlobalProperties());
			}
			allValidGlobals = pairs.toArray(new RequestedGlobalProperties[pairs.size()]);
		}
		final ArrayList outputPlans = new ArrayList();

		final ExecutionMode executionMode = this.inConn.getDataExchangeMode();

		final int parallelism = getParallelism();
		final int inParallelism = getPredecessorNode().getParallelism();

		final boolean parallelismChange = inParallelism != parallelism;

		final boolean breaksPipeline = this.inConn.isBreakingPipeline();

		// create all candidates
		for (PlanNode child : subPlans) {

			if (child.getGlobalProperties().isFullyReplicated()) {
				// fully replicated input is always locally forwarded if the parallelism is not changed
				if (parallelismChange) {
					// can not continue with this child
					childrenSkippedDueToReplicatedInput = true;
					continue;
				} else {
					this.inConn.setShipStrategy(ShipStrategyType.FORWARD);
				}
			}

			if (this.inConn.getShipStrategy() == null) {
				// pick the strategy ourselves
				for (RequestedGlobalProperties igps: intGlobal) {
					final Channel c = new Channel(child, this.inConn.getMaterializationMode());
					igps.parameterizeChannel(c, parallelismChange, executionMode, breaksPipeline);
					
					// if the parallelism changed, make sure that we cancel out properties, unless the
					// ship strategy preserves/establishes them even under changing parallelisms
					if (parallelismChange && !c.getShipStrategy().isNetworkStrategy()) {
						c.getGlobalProperties().reset();
					}
					
					// check whether we meet any of the accepted properties
					// we may remove this check, when we do a check to not inherit
					// requested global properties that are incompatible with all possible
					// requested properties
					for (RequestedGlobalProperties rgps: allValidGlobals) {
						if (rgps.isMetBy(c.getGlobalProperties())) {
							c.setRequiredGlobalProps(rgps);
							addLocalCandidates(c, broadcastPlanChannels, igps, outputPlans, estimator);
							break;
						}
					}
				}
			} else {
				// hint fixed the strategy
				final Channel c = new Channel(child, this.inConn.getMaterializationMode());
				final ShipStrategyType shipStrategy = this.inConn.getShipStrategy();
				final DataExchangeMode exMode = DataExchangeMode.select(executionMode, shipStrategy, breaksPipeline);

				if (this.keys != null) {
					c.setShipStrategy(shipStrategy, this.keys.toFieldList(), exMode);
				} else {
					c.setShipStrategy(shipStrategy, exMode);
				}
				
				if (parallelismChange) {
					c.adjustGlobalPropertiesForFullParallelismChange();
				}

				// check whether we meet any of the accepted properties
				for (RequestedGlobalProperties rgps: allValidGlobals) {
					if (rgps.isMetBy(c.getGlobalProperties())) {
						addLocalCandidates(c, broadcastPlanChannels, rgps, outputPlans, estimator);
						break;
					}
				}
			}
		}

		if(outputPlans.isEmpty()) {
			if(childrenSkippedDueToReplicatedInput) {
				throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
			} else {
				throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
			}
		}

		// cost and prune the plans
		for (PlanNode node : outputPlans) {
			estimator.costOperator(node);
		}
		prunePlanAlternatives(outputPlans);
		outputPlans.trimToSize();

		this.cachedPlans = outputPlans;
		return outputPlans;
	}
	
	protected void addLocalCandidates(Channel template, List> broadcastPlanChannels, RequestedGlobalProperties rgps,
			List target, CostEstimator estimator)
	{
		for (RequestedLocalProperties ilp : this.inConn.getInterestingProperties().getLocalProperties()) {
			final Channel in = template.clone();
			ilp.parameterizeChannel(in);
			
			// instantiate a candidate, if the instantiated local properties meet one possible local property set
			outer:
			for (OperatorDescriptorSingle dps: getPossibleProperties()) {
				for (RequestedLocalProperties ilps : dps.getPossibleLocalProperties()) {
					if (ilps.isMetBy(in.getLocalProperties())) {
						in.setRequiredLocalProps(ilps);
						instantiateCandidate(dps, in, broadcastPlanChannels, target, estimator, rgps, ilp);
						break outer;
					}
				}
			}
		}
	}

	protected void instantiateCandidate(OperatorDescriptorSingle dps, Channel in, List> broadcastPlanChannels,
			List target, CostEstimator estimator, RequestedGlobalProperties globPropsReq, RequestedLocalProperties locPropsReq)
	{
		final PlanNode inputSource = in.getSource();
		
		for (List broadcastChannelsCombination: Sets.cartesianProduct(broadcastPlanChannels)) {
			
			boolean validCombination = true;
			boolean requiresPipelinebreaker = false;
			
			// check whether the broadcast inputs use the same plan candidate at the branching point
			for (int i = 0; i < broadcastChannelsCombination.size(); i++) {
				NamedChannel nc = broadcastChannelsCombination.get(i);
				PlanNode bcSource = nc.getSource();
				
				// check branch compatibility against input
				if (!areBranchCompatible(bcSource, inputSource)) {
					validCombination = false;
					break;
				}
				
				// check branch compatibility against all other broadcast variables
				for (int k = 0; k < i; k++) {
					PlanNode otherBcSource = broadcastChannelsCombination.get(k).getSource();
					
					if (!areBranchCompatible(bcSource, otherBcSource)) {
						validCombination = false;
						break;
					}
				}
				
				// check if there is a common predecessor and whether there is a dam on the way to all common predecessors
				if (in.isOnDynamicPath() && this.hereJoinedBranches != null) {
					for (OptimizerNode brancher : this.hereJoinedBranches) {
						PlanNode candAtBrancher = in.getSource().getCandidateAtBranchPoint(brancher);
						
						if (candAtBrancher == null) {
							// closed branch between two broadcast variables
							continue;
						}
						
						SourceAndDamReport res = in.getSource().hasDamOnPathDownTo(candAtBrancher);
						if (res == NOT_FOUND) {
							throw new CompilerException("Bug: Tracing dams for deadlock detection is broken.");
						} else if (res == FOUND_SOURCE) {
							requiresPipelinebreaker = true;
							break;
						} else if (res == FOUND_SOURCE_AND_DAM) {
							// good
						} else {
							throw new CompilerException();
						}
					}
				}
			}
			
			if (!validCombination) {
				continue;
			}
			
			if (requiresPipelinebreaker) {
				in.setTempMode(in.getTempMode().makePipelineBreaker());
			}
			
			final SingleInputPlanNode node = dps.instantiate(in, this);
			node.setBroadcastInputs(broadcastChannelsCombination);
			
			// compute how the strategy affects the properties
			GlobalProperties gProps = in.getGlobalProperties().clone();
			LocalProperties lProps = in.getLocalProperties().clone();
			gProps = dps.computeGlobalProperties(gProps);
			lProps = dps.computeLocalProperties(lProps);

			// filter by the user code field copies
			gProps = gProps.filterBySemanticProperties(getSemanticPropertiesForGlobalPropertyFiltering(), 0);
			lProps = lProps.filterBySemanticProperties(getSemanticPropertiesForLocalPropertyFiltering(), 0);
			
			// apply
			node.initProperties(gProps, lProps);
			node.updatePropertiesWithUniqueSets(getUniqueFields());
			target.add(node);
		}
	}

	// --------------------------------------------------------------------------------------------
	//                                     Branch Handling
	// --------------------------------------------------------------------------------------------
	
	@Override
	public void computeUnclosedBranchStack() {
		if (this.openBranches != null) {
			return;
		}

		addClosedBranches(getPredecessorNode().closedBranchingNodes);
		List fromInput = getPredecessorNode().getBranchesForParent(this.inConn);
		
		// handle the data flow branching for the broadcast inputs
		List result = computeUnclosedBranchStackForBroadcastInputs(fromInput);
		
		this.openBranches = (result == null || result.isEmpty()) ? Collections.emptyList() : result;
	}
	
	// --------------------------------------------------------------------------------------------
	//                                     Miscellaneous
	// --------------------------------------------------------------------------------------------

	@Override
	public void accept(Visitor visitor) {
		if (visitor.preVisit(this)) {
			if (getPredecessorNode() != null) {
				getPredecessorNode().accept(visitor);
			} else {
				throw new CompilerException();
			}
			for (DagConnection connection : getBroadcastConnections()) {
				connection.getSource().accept(visitor);
			}
			visitor.postVisit(this);
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy