All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.compiler.dag.WorksetIterationNode Maven / Gradle / Ivy

The newest version!
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.compiler.dag;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import eu.stratosphere.api.common.operators.base.DeltaIterationBase;
import eu.stratosphere.api.common.operators.util.FieldList;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.DataStatistics;
import eu.stratosphere.compiler.PactCompiler.InterestingPropertyVisitor;
import eu.stratosphere.compiler.costs.CostEstimator;
import eu.stratosphere.compiler.dataproperties.GlobalProperties;
import eu.stratosphere.compiler.dataproperties.InterestingProperties;
import eu.stratosphere.compiler.dataproperties.LocalProperties;
import eu.stratosphere.compiler.dataproperties.PartitioningProperty;
import eu.stratosphere.compiler.dataproperties.RequestedGlobalProperties;
import eu.stratosphere.compiler.dataproperties.RequestedLocalProperties;
import eu.stratosphere.compiler.operators.OperatorDescriptorDual;
import eu.stratosphere.compiler.operators.SolutionSetDeltaOperator;
import eu.stratosphere.compiler.plan.Channel;
import eu.stratosphere.compiler.plan.DualInputPlanNode;
import eu.stratosphere.compiler.plan.NamedChannel;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.compiler.plan.SingleInputPlanNode;
import eu.stratosphere.compiler.plan.SolutionSetPlanNode;
import eu.stratosphere.compiler.plan.WorksetIterationPlanNode;
import eu.stratosphere.compiler.plan.WorksetPlanNode;
import eu.stratosphere.compiler.util.NoOpBinaryUdfOp;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.types.Nothing;
import eu.stratosphere.types.NothingTypeInfo;
import eu.stratosphere.util.Visitor;

/**
 * A node in the optimizer's program representation for a workset iteration.
 */
public class WorksetIterationNode extends TwoInputNode implements IterationNode {
	
	private static final int DEFAULT_COST_WEIGHT = 20;
	
	
	private final FieldList solutionSetKeyFields;
	
	private final GlobalProperties partitionedProperties;
	
	private SolutionSetNode solutionSetNode;
	
	private WorksetNode worksetNode;
	
	private OptimizerNode solutionSetDelta;
	
	private OptimizerNode nextWorkset;
	
	private PactConnection solutionSetDeltaRootConnection;
	
	private PactConnection nextWorksetRootConnection;
	
	private SingleRootJoiner singleRoot;
	
	private boolean solutionDeltaImmediatelyAfterSolutionJoin;
	
	private final int costWeight;

	// --------------------------------------------------------------------------------------------
	
	/**
	 * Creates a new node with a single input for the optimizer plan.
	 * 
	 * @param iteration The iteration operator that the node represents.
	 */
	public WorksetIterationNode(DeltaIterationBase iteration) {
		super(iteration);
		
		final int[] ssKeys = iteration.getSolutionSetKeyFields();
		if (ssKeys == null || ssKeys.length == 0) {
			throw new CompilerException("Invalid WorksetIteration: No key fields defined for the solution set.");
		}
		this.solutionSetKeyFields = new FieldList(ssKeys);
		this.partitionedProperties = new GlobalProperties();
		this.partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);
		
		int weight = iteration.getMaximumNumberOfIterations() > 0 ? 
			iteration.getMaximumNumberOfIterations() : DEFAULT_COST_WEIGHT;
			
		if (weight > OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT) {
			weight = OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT;
		}
		this.costWeight = weight; 
		
		this.possibleProperties.add(new WorksetOpDescriptor(this.solutionSetKeyFields));
	}

	// --------------------------------------------------------------------------------------------
	
	public DeltaIterationBase getIterationContract() {
		return (DeltaIterationBase) getPactContract();
	}
	
	public SolutionSetNode getSolutionSetNode() {
		return this.solutionSetNode;
	}
	
	public WorksetNode getWorksetNode() {
		return this.worksetNode;
	}
	
	public OptimizerNode getNextWorkset() {
		return this.nextWorkset;
	}
	
	public OptimizerNode getSolutionSetDelta() {
		return this.solutionSetDelta;
	}

	public void setPartialSolution(SolutionSetNode solutionSetNode, WorksetNode worksetNode) {
		if (this.solutionSetNode != null || this.worksetNode != null) {
			throw new IllegalStateException("Error: Initializing WorksetIterationNode multiple times.");
		}
		this.solutionSetNode = solutionSetNode;
		this.worksetNode = worksetNode;
	}
	
	public void setNextPartialSolution(OptimizerNode solutionSetDelta, OptimizerNode nextWorkset) {
		// check whether the next partial solution is itself the join with
		// the partial solution (so we can potentially do direct updates)
		if (solutionSetDelta instanceof TwoInputNode) {
			TwoInputNode solutionDeltaTwoInput = (TwoInputNode) solutionSetDelta;
			if (solutionDeltaTwoInput.getFirstPredecessorNode() == this.solutionSetNode ||
				solutionDeltaTwoInput.getSecondPredecessorNode() == this.solutionSetNode)
			{
				this.solutionDeltaImmediatelyAfterSolutionJoin = true;
			}
		}
		
		// attach an extra node to the solution set delta for the cases where we need to repartition
		UnaryOperatorNode solutionSetDeltaUpdateAux = new UnaryOperatorNode("Solution-Set Delta", getSolutionSetKeyFields(),
				new SolutionSetDeltaOperator(getSolutionSetKeyFields()));
		solutionSetDeltaUpdateAux.setDegreeOfParallelism(getDegreeOfParallelism());
		solutionSetDeltaUpdateAux.setSubtasksPerInstance(getSubtasksPerInstance());
		
		PactConnection conn = new PactConnection(solutionSetDelta, solutionSetDeltaUpdateAux);
		solutionSetDeltaUpdateAux.setIncomingConnection(conn);
		solutionSetDelta.addOutgoingConnection(conn);
		
		this.solutionSetDelta = solutionSetDeltaUpdateAux;
		this.nextWorkset = nextWorkset;
		
		this.singleRoot = new SingleRootJoiner();
		this.solutionSetDeltaRootConnection = new PactConnection(solutionSetDeltaUpdateAux, this.singleRoot);
		this.nextWorksetRootConnection = new PactConnection(nextWorkset, this.singleRoot);
		this.singleRoot.setInputs(this.solutionSetDeltaRootConnection, this.nextWorksetRootConnection);
		
		solutionSetDeltaUpdateAux.addOutgoingConnection(this.solutionSetDeltaRootConnection);
		nextWorkset.addOutgoingConnection(this.nextWorksetRootConnection);
	}
	
	public int getCostWeight() {
		return this.costWeight;
	}
	
	public TwoInputNode getSingleRootOfStepFunction() {
		return this.singleRoot;
	}
	
	public FieldList getSolutionSetKeyFields() {
		return this.solutionSetKeyFields;
	}
	
	public OptimizerNode getInitialSolutionSetPredecessorNode() {
		return getFirstPredecessorNode();
	}
	
	public OptimizerNode getInitialWorksetPredecessorNode() {
		return getSecondPredecessorNode();
	}

	// --------------------------------------------------------------------------------------------
	
	@Override
	public String getName() {
		return "Workset Iteration";
	}
	
	@Override
	public boolean isFieldConstant(int input, int fieldNumber) {
		return false;
	}
	
	protected void readStubAnnotations() {}
	
	@Override
	protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
		this.estimatedOutputSize = getFirstPredecessorNode().getEstimatedOutputSize();
		this.estimatedNumRecords = getFirstPredecessorNode().getEstimatedNumRecords();
	}
	
	// --------------------------------------------------------------------------------------------
	//                             Properties and Optimization
	// --------------------------------------------------------------------------------------------
	
	@Override
	public boolean isMemoryConsumer() {
		return true;
	}
	
	@Override
	protected List getPossibleProperties() {
		return new ArrayList(1);
	}
	
	@Override
	public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
		// our own solution (the solution set) is always partitioned and this cannot be adjusted
		// depending on what the successor to the workset iteration requests. for that reason,
		// we ignore incoming interesting properties.
		
		// in addition, we need to make 2 interesting property passes, because the root of the step function 
		// that computes the next workset needs the interesting properties as generated by the
		// workset source of the step function. the second pass concerns only the workset path.
		// as initial interesting properties, we have the trivial ones for the step function,
		// and partitioned on the solution set key for the solution set delta 
		
		RequestedGlobalProperties partitionedProperties = new RequestedGlobalProperties();
		partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);
		InterestingProperties partitionedIP = new InterestingProperties();
		partitionedIP.addGlobalProperties(partitionedProperties);
		partitionedIP.addLocalProperties(new RequestedLocalProperties());
		
		this.nextWorksetRootConnection.setInterestingProperties(new InterestingProperties());
		this.solutionSetDeltaRootConnection.setInterestingProperties(partitionedIP.clone());
		
		InterestingPropertyVisitor ipv = new InterestingPropertyVisitor(estimator);
		this.nextWorkset.accept(ipv);
		this.solutionSetDelta.accept(ipv);
		
		// take the interesting properties of the partial solution and add them to the root interesting properties
		InterestingProperties worksetIntProps = this.worksetNode.getInterestingProperties();
		InterestingProperties intProps = new InterestingProperties();
		intProps.getGlobalProperties().addAll(worksetIntProps.getGlobalProperties());
		intProps.getLocalProperties().addAll(worksetIntProps.getLocalProperties());
		
		// clear all interesting properties to prepare the second traversal
		this.nextWorksetRootConnection.clearInterestingProperties();
		this.nextWorkset.accept(InterestingPropertiesClearer.INSTANCE);
		
		// 2nd pass
		this.nextWorksetRootConnection.setInterestingProperties(intProps);
		this.nextWorkset.accept(ipv);
		
		// now add the interesting properties of the workset to the workset input
		final InterestingProperties inProps = this.worksetNode.getInterestingProperties().clone();
		inProps.addGlobalProperties(new RequestedGlobalProperties());
		inProps.addLocalProperties(new RequestedLocalProperties());
		this.input2.setInterestingProperties(inProps);
		
		// the partial solution must be hash partitioned, so it has only that as interesting properties
		this.input1.setInterestingProperties(partitionedIP);
	}
	
	
	@Override
	protected void instantiate(OperatorDescriptorDual operator, Channel solutionSetIn, Channel worksetIn,
			List> broadcastPlanChannels, List target, CostEstimator estimator,
			RequestedGlobalProperties globPropsReqSolutionSet,RequestedGlobalProperties globPropsReqWorkset,
			RequestedLocalProperties locPropsReqSolutionSet, RequestedLocalProperties locPropsReqWorkset)
	{
		// check for pipeline breaking using hash join with build on the solution set side
		placePipelineBreakersIfNecessary(DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn);
		
		// NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
		// Whenever we instantiate the iteration, we enumerate new candidates for the step function.
		// That way, we make sure we have an appropriate plan for each candidate for the initial partial solution,
		// we have a fitting candidate for the step function (often, work is pushed out of the step function).
		// Among the candidates of the step function, we keep only those that meet the requested properties of the
		// current candidate initial partial solution. That makes sure these properties exist at the beginning of
		// every iteration.
		
		// 1) Because we enumerate multiple times, we may need to clean the cached plans
		//    before starting another enumeration
		this.nextWorkset.accept(PlanCacheCleaner.INSTANCE);
		this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE);
		
		// 2) Give the partial solution the properties of the current candidate for the initial partial solution
		//    This concerns currently only the workset.
		this.worksetNode.setCandidateProperties(worksetIn.getGlobalProperties(), worksetIn.getLocalProperties(), worksetIn);
		this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties(), solutionSetIn);
		
		final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode();
		final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode();
		
		// 3) Get the alternative plans
		List solutionSetDeltaCandidates = this.solutionSetDelta.getAlternativePlans(estimator);
		List worksetCandidates = this.nextWorkset.getAlternativePlans(estimator);
		
		// 4) Throw away all that are not compatible with the properties currently requested to the
		//    initial partial solution
		
		// Make sure that the workset candidates fulfill the input requirements
		for (Iterator planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) {
			PlanNode candidate = planDeleter.next();
			if (!(globPropsReqWorkset.isMetBy(candidate.getGlobalProperties()) && locPropsReqWorkset.isMetBy(candidate.getLocalProperties()))) {
				planDeleter.remove();
			}
		}
		if (worksetCandidates.isEmpty()) {
			return;
		}
		
		// sanity check the solution set delta and cancel out the delta node, if it is not needed
		for (Iterator deltaPlans = solutionSetDeltaCandidates.iterator(); deltaPlans.hasNext(); ) {
			SingleInputPlanNode candidate = (SingleInputPlanNode) deltaPlans.next();
			GlobalProperties gp = candidate.getGlobalProperties();
			
			if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED || gp.getPartitioningFields() == null ||
					!gp.getPartitioningFields().equals(this.solutionSetKeyFields))
			{
				throw new CompilerException("Bug: The solution set delta is not partitioned.");
			}
		}
		
		// 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
		
		final GlobalProperties gp = new GlobalProperties();
		gp.setHashPartitioned(this.solutionSetKeyFields);
		gp.addUniqueFieldCombination(this.solutionSetKeyFields);
		
		final LocalProperties lp = new LocalProperties();
		lp.addUniqueFields(this.solutionSetKeyFields);
		
		// take all combinations of solution set delta and workset plans
		for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) {
			for (PlanNode worksetCandidate : worksetCandidates) {
				// check whether they have the same operator at their latest branching point
				if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) {
					
					SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate;
					boolean immediateDeltaUpdate;
					
					// check whether we need a dedicated solution set delta operator, or whether we can update on the fly
					if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD && this.solutionDeltaImmediatelyAfterSolutionJoin) {
						// we do not need this extra node. we can make the predecessor the delta
						// sanity check the node and connection
						if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) {
							throw new CompilerException("Invalid Solution set delta node.");
						}
						
						solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource();
						immediateDeltaUpdate = true;
					} else {
						// was not partitioned, we need to keep this node.
						// mark that we materialize the input
						siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER);
						immediateDeltaUpdate = false;
					}
					
					WorksetIterationPlanNode wsNode = new WorksetIterationPlanNode(
						this, "WorksetIteration ("+this.getPactContract().getName()+")", solutionSetIn, worksetIn, sspn, wspn, worksetCandidate, solutionSetCandidate);
					wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate);
					wsNode.initProperties(gp, lp);
					target.add(wsNode);
				}
			}
		}
	}

	@Override
	public void computeUnclosedBranchStack() {
		if (this.openBranches != null) {
			return;
		}

		
		// IMPORTANT: First compute closed branches from the two inputs
		// we need to do this because the runtime iteration head effectively joins
		addClosedBranches(getFirstPredecessorNode().closedBranchingNodes);
		addClosedBranches(getSecondPredecessorNode().closedBranchingNodes);

		List result1 = getFirstPredecessorNode().getBranchesForParent(getFirstIncomingConnection());
		List result2 = getSecondPredecessorNode().getBranchesForParent(getSecondIncomingConnection());

		ArrayList inputsMerged1 = new ArrayList();
		mergeLists(result1, result2, inputsMerged1);
		
		addClosedBranches(getSingleRootOfStepFunction().closedBranchingNodes);

		ArrayList inputsMerged2 = new ArrayList();
		List result3 = getSingleRootOfStepFunction().openBranches;
		mergeLists(inputsMerged1, result3, inputsMerged2);

		// handle the data flow branching for the broadcast inputs
		List result = computeUnclosedBranchStackForBroadcastInputs(inputsMerged2);

		this.openBranches = (result == null || result.isEmpty()) ? Collections.emptyList() : result;
	}
	
	// --------------------------------------------------------------------------------------------
	//                      Iteration Specific Traversals
	// --------------------------------------------------------------------------------------------

	public void acceptForStepFunction(Visitor visitor) {
		this.singleRoot.accept(visitor);
	}
	
	// --------------------------------------------------------------------------------------------
	//                             Utility Classes
	// --------------------------------------------------------------------------------------------
	
	private static final class WorksetOpDescriptor extends OperatorDescriptorDual {
		
		private WorksetOpDescriptor(FieldList solutionSetKeys) {
			super(solutionSetKeys, null);
		}

		@Override
		public DriverStrategy getStrategy() {
			return DriverStrategy.NONE;
		}

		@Override
		protected List createPossibleGlobalProperties() {
			RequestedGlobalProperties partitionedGp = new RequestedGlobalProperties();
			partitionedGp.setHashPartitioned(this.keys1);
			return Collections.singletonList(new GlobalPropertiesPair(partitionedGp, new RequestedGlobalProperties()));
		}

		@Override
		protected List createPossibleLocalProperties() {
			// all properties are possible
			return Collections.singletonList(new LocalPropertiesPair(
				new RequestedLocalProperties(), new RequestedLocalProperties()));
		}
		
		@Override
		public boolean areCoFulfilled(RequestedLocalProperties requested1, RequestedLocalProperties requested2,
				LocalProperties produced1, LocalProperties produced2) {
			return true;
		}

		@Override
		public DualInputPlanNode instantiate(Channel in1, Channel in2, TwoInputNode node) {
			throw new UnsupportedOperationException();
		}

		@Override
		public GlobalProperties computeGlobalProperties(GlobalProperties in1, GlobalProperties in2) {
			throw new UnsupportedOperationException();
		}

		@Override
		public LocalProperties computeLocalProperties(LocalProperties in1, LocalProperties in2) {
			throw new UnsupportedOperationException();
		}
	}
	
	public static class SingleRootJoiner extends TwoInputNode {
		
		SingleRootJoiner() {
			super(new NoOpBinaryUdfOp(new NothingTypeInfo()));
			
			setDegreeOfParallelism(1);
			setSubtasksPerInstance(1);
		}
		
		public void setInputs(PactConnection input1, PactConnection input2) {
			this.input1 = input1;
			this.input2 = input2;
		}
		
		@Override
		public String getName() {
			return "Internal Utility Node";
		}

		@Override
		protected List getPossibleProperties() {
			return Collections.emptyList();
		}

		@Override
		protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
			// no estimates are needed here
		}

	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy