All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.optimizer.dag.BulkIterationNode Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.optimizer.dag;

import org.apache.flink.api.common.ExecutionMode;
import org.apache.flink.api.common.operators.SemanticProperties;
import org.apache.flink.api.common.operators.SemanticProperties.EmptySemanticProperties;
import org.apache.flink.api.common.operators.base.BulkIterationBase;
import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.DataStatistics;
import org.apache.flink.optimizer.costs.CostEstimator;
import org.apache.flink.optimizer.dag.WorksetIterationNode.SingleRootJoiner;
import org.apache.flink.optimizer.dataproperties.GlobalProperties;
import org.apache.flink.optimizer.dataproperties.InterestingProperties;
import org.apache.flink.optimizer.dataproperties.LocalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties;
import org.apache.flink.optimizer.dataproperties.RequestedLocalProperties;
import org.apache.flink.optimizer.operators.NoOpDescriptor;
import org.apache.flink.optimizer.operators.OperatorDescriptorSingle;
import org.apache.flink.optimizer.plan.BulkIterationPlanNode;
import org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode;
import org.apache.flink.optimizer.plan.Channel;
import org.apache.flink.optimizer.plan.NamedChannel;
import org.apache.flink.optimizer.plan.PlanNode;
import org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport;
import org.apache.flink.optimizer.plan.SingleInputPlanNode;
import org.apache.flink.optimizer.traversals.InterestingPropertyVisitor;
import org.apache.flink.optimizer.util.NoOpUnaryUdfOp;
import org.apache.flink.runtime.operators.DriverStrategy;
import org.apache.flink.util.Visitor;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/** A node in the optimizer's program representation for a bulk iteration. */
public class BulkIterationNode extends SingleInputNode implements IterationNode {

    private BulkPartialSolutionNode partialSolution;

    private OptimizerNode terminationCriterion;

    private OptimizerNode nextPartialSolution;

    private DagConnection rootConnection; // connection out of the next partial solution

    private DagConnection
            terminationCriterionRootConnection; // connection out of the term. criterion

    private OptimizerNode singleRoot;

    private final int costWeight;

    // --------------------------------------------------------------------------------------------

    /**
     * Creates a new node for the bulk iteration.
     *
     * @param iteration The bulk iteration the node represents.
     */
    public BulkIterationNode(BulkIterationBase iteration) {
        super(iteration);

        if (iteration.getMaximumNumberOfIterations() <= 0) {
            throw new CompilerException(
                    "BulkIteration must have a maximum number of iterations specified.");
        }

        int numIters = iteration.getMaximumNumberOfIterations();

        this.costWeight =
                (numIters > 0 && numIters < OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT)
                        ? numIters
                        : OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT;
    }

    // --------------------------------------------------------------------------------------------

    public BulkIterationBase getIterationContract() {
        return (BulkIterationBase) getOperator();
    }

    /**
     * Gets the partialSolution from this BulkIterationNode.
     *
     * @return The partialSolution.
     */
    public BulkPartialSolutionNode getPartialSolution() {
        return partialSolution;
    }

    /**
     * Sets the partialSolution for this BulkIterationNode.
     *
     * @param partialSolution The partialSolution to set.
     */
    public void setPartialSolution(BulkPartialSolutionNode partialSolution) {
        this.partialSolution = partialSolution;
    }

    /**
     * Gets the nextPartialSolution from this BulkIterationNode.
     *
     * @return The nextPartialSolution.
     */
    public OptimizerNode getNextPartialSolution() {
        return nextPartialSolution;
    }

    /**
     * Sets the nextPartialSolution for this BulkIterationNode.
     *
     * @param nextPartialSolution The nextPartialSolution to set.
     */
    public void setNextPartialSolution(
            OptimizerNode nextPartialSolution, OptimizerNode terminationCriterion) {

        // check if the root of the step function has the same parallelism as the iteration
        // or if the step function has any operator at all
        if (nextPartialSolution.getParallelism() != getParallelism()
                || nextPartialSolution == partialSolution
                || nextPartialSolution instanceof BinaryUnionNode) {
            // add a no-op to the root to express the re-partitioning
            NoOpNode noop = new NoOpNode();
            noop.setParallelism(getParallelism());

            DagConnection noOpConn =
                    new DagConnection(nextPartialSolution, noop, ExecutionMode.PIPELINED);
            noop.setIncomingConnection(noOpConn);
            nextPartialSolution.addOutgoingConnection(noOpConn);

            nextPartialSolution = noop;
        }

        this.nextPartialSolution = nextPartialSolution;
        this.terminationCriterion = terminationCriterion;

        if (terminationCriterion == null) {
            this.singleRoot = nextPartialSolution;
            this.rootConnection = new DagConnection(nextPartialSolution, ExecutionMode.PIPELINED);
        } else {
            // we have a termination criterion
            SingleRootJoiner singleRootJoiner = new SingleRootJoiner();
            this.rootConnection =
                    new DagConnection(
                            nextPartialSolution, singleRootJoiner, ExecutionMode.PIPELINED);
            this.terminationCriterionRootConnection =
                    new DagConnection(
                            terminationCriterion, singleRootJoiner, ExecutionMode.PIPELINED);

            singleRootJoiner.setInputs(
                    this.rootConnection, this.terminationCriterionRootConnection);

            this.singleRoot = singleRootJoiner;

            // add connection to terminationCriterion for interesting properties visitor
            terminationCriterion.addOutgoingConnection(terminationCriterionRootConnection);
        }

        nextPartialSolution.addOutgoingConnection(rootConnection);
    }

    public int getCostWeight() {
        return this.costWeight;
    }

    public OptimizerNode getSingleRootOfStepFunction() {
        return this.singleRoot;
    }

    // --------------------------------------------------------------------------------------------

    @Override
    public String getOperatorName() {
        return "Bulk Iteration";
    }

    @Override
    public SemanticProperties getSemanticProperties() {
        return new EmptySemanticProperties();
    }

    protected void readStubAnnotations() {}

    @Override
    protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
        this.estimatedOutputSize = getPredecessorNode().getEstimatedOutputSize();
        this.estimatedNumRecords = getPredecessorNode().getEstimatedNumRecords();
    }

    // --------------------------------------------------------------------------------------------
    //                             Properties and Optimization
    // --------------------------------------------------------------------------------------------

    protected List getPossibleProperties() {
        return Collections.singletonList(new NoOpDescriptor());
    }

    @Override
    public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
        final InterestingProperties intProps = getInterestingProperties().clone();

        if (this.terminationCriterion != null) {
            // first propagate through termination Criterion. since it has no successors, it has no
            // interesting properties
            this.terminationCriterionRootConnection.setInterestingProperties(
                    new InterestingProperties());
            this.terminationCriterion.accept(new InterestingPropertyVisitor(estimator));
        }

        // we need to make 2 interesting property passes, because the root of the step function
        // needs also
        // the interesting properties as generated by the partial solution

        // give our own interesting properties (as generated by the iterations successors) to the
        // step function and
        // make the first pass
        this.rootConnection.setInterestingProperties(intProps);
        this.nextPartialSolution.accept(new InterestingPropertyVisitor(estimator));

        // take the interesting properties of the partial solution and add them to the root
        // interesting properties
        InterestingProperties partialSolutionIntProps =
                this.partialSolution.getInterestingProperties();
        intProps.getGlobalProperties().addAll(partialSolutionIntProps.getGlobalProperties());
        intProps.getLocalProperties().addAll(partialSolutionIntProps.getLocalProperties());

        // clear all interesting properties to prepare the second traversal
        // this clears only the path down from the next partial solution. The paths down
        // from the termination criterion (before they meet the paths down from the next partial
        // solution)
        // remain unaffected by this step
        this.rootConnection.clearInterestingProperties();
        this.nextPartialSolution.accept(InterestingPropertiesClearer.INSTANCE);

        // 2nd pass
        this.rootConnection.setInterestingProperties(intProps);
        this.nextPartialSolution.accept(new InterestingPropertyVisitor(estimator));

        // now add the interesting properties of the partial solution to the input
        final InterestingProperties inProps =
                this.partialSolution.getInterestingProperties().clone();
        inProps.addGlobalProperties(new RequestedGlobalProperties());
        inProps.addLocalProperties(new RequestedLocalProperties());
        this.inConn.setInterestingProperties(inProps);
    }

    @Override
    public void clearInterestingProperties() {
        super.clearInterestingProperties();

        this.singleRoot.accept(InterestingPropertiesClearer.INSTANCE);
        this.rootConnection.clearInterestingProperties();
    }

    @Override
    public void computeUnclosedBranchStack() {
        if (this.openBranches != null) {
            return;
        }

        // the resulting branches are those of the step function
        // because the BulkPartialSolution takes the input's branches
        addClosedBranches(getSingleRootOfStepFunction().closedBranchingNodes);
        List result = getSingleRootOfStepFunction().openBranches;

        this.openBranches =
                (result == null || result.isEmpty())
                        ? Collections.emptyList()
                        : result;
    }

    @SuppressWarnings("unchecked")
    @Override
    protected void instantiateCandidate(
            OperatorDescriptorSingle dps,
            Channel in,
            List> broadcastPlanChannels,
            List target,
            CostEstimator estimator,
            RequestedGlobalProperties globPropsReq,
            RequestedLocalProperties locPropsReq) {
        // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
        // Whenever we instantiate the iteration, we enumerate new candidates for the step function.
        // That way, we make sure we have an appropriate plan for each candidate for the initial
        // partial solution,
        // we have a fitting candidate for the step function (often, work is pushed out of the step
        // function).
        // Among the candidates of the step function, we keep only those that meet the requested
        // properties of the
        // current candidate initial partial solution. That makes sure these properties exist at the
        // beginning of
        // the successive iteration.

        // 1) Because we enumerate multiple times, we may need to clean the cached plans
        //    before starting another enumeration
        this.nextPartialSolution.accept(PlanCacheCleaner.INSTANCE);
        if (this.terminationCriterion != null) {
            this.terminationCriterion.accept(PlanCacheCleaner.INSTANCE);
        }

        // 2) Give the partial solution the properties of the current candidate for the initial
        // partial solution
        this.partialSolution.setCandidateProperties(
                in.getGlobalProperties(), in.getLocalProperties(), in);
        final BulkPartialSolutionPlanNode pspn =
                this.partialSolution.getCurrentPartialSolutionPlanNode();

        // 3) Get the alternative plans
        List candidates = this.nextPartialSolution.getAlternativePlans(estimator);

        // 4) Make sure that the beginning of the step function does not assume properties that
        //    are not also produced by the end of the step function.

        {
            List newCandidates = new ArrayList();

            for (Iterator planDeleter = candidates.iterator(); planDeleter.hasNext(); ) {
                PlanNode candidate = planDeleter.next();

                GlobalProperties atEndGlobal = candidate.getGlobalProperties();
                LocalProperties atEndLocal = candidate.getLocalProperties();

                FeedbackPropertiesMeetRequirementsReport report =
                        candidate.checkPartialSolutionPropertiesMet(pspn, atEndGlobal, atEndLocal);
                if (report == FeedbackPropertiesMeetRequirementsReport.NO_PARTIAL_SOLUTION) {
                    // depends only through broadcast variable on the partial solution
                } else if (report == FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
                    // attach a no-op node through which we create the properties of the original
                    // input
                    Channel toNoOp = new Channel(candidate);
                    globPropsReq.parameterizeChannel(
                            toNoOp, false, rootConnection.getDataExchangeMode(), false);
                    locPropsReq.parameterizeChannel(toNoOp);

                    NoOpUnaryUdfOp noOpUnaryUdfOp = new NoOpUnaryUdfOp<>();
                    noOpUnaryUdfOp.setInput(candidate.getProgramOperator());
                    UnaryOperatorNode rebuildPropertiesNode =
                            new UnaryOperatorNode(
                                    "Rebuild Partial Solution Properties", noOpUnaryUdfOp, true);
                    rebuildPropertiesNode.setParallelism(candidate.getParallelism());

                    SingleInputPlanNode rebuildPropertiesPlanNode =
                            new SingleInputPlanNode(
                                    rebuildPropertiesNode,
                                    "Rebuild Partial Solution Properties",
                                    toNoOp,
                                    DriverStrategy.UNARY_NO_OP);
                    rebuildPropertiesPlanNode.initProperties(
                            toNoOp.getGlobalProperties(), toNoOp.getLocalProperties());
                    estimator.costOperator(rebuildPropertiesPlanNode);

                    GlobalProperties atEndGlobalModified =
                            rebuildPropertiesPlanNode.getGlobalProperties();
                    LocalProperties atEndLocalModified =
                            rebuildPropertiesPlanNode.getLocalProperties();

                    if (!(atEndGlobalModified.equals(atEndGlobal)
                            && atEndLocalModified.equals(atEndLocal))) {
                        FeedbackPropertiesMeetRequirementsReport report2 =
                                candidate.checkPartialSolutionPropertiesMet(
                                        pspn, atEndGlobalModified, atEndLocalModified);

                        if (report2 != FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
                            newCandidates.add(rebuildPropertiesPlanNode);
                        }
                    }

                    planDeleter.remove();
                }
            }

            candidates.addAll(newCandidates);
        }

        if (candidates.isEmpty()) {
            return;
        }

        // 5) Create a candidate for the Iteration Node for every remaining plan of the step
        // function.
        if (terminationCriterion == null) {
            for (PlanNode candidate : candidates) {
                BulkIterationPlanNode node =
                        new BulkIterationPlanNode(
                                this, this.getOperator().getName(), in, pspn, candidate);
                GlobalProperties gProps = candidate.getGlobalProperties().clone();
                LocalProperties lProps = candidate.getLocalProperties().clone();
                node.initProperties(gProps, lProps);
                target.add(node);
            }
        } else if (candidates.size() > 0) {
            List terminationCriterionCandidates =
                    this.terminationCriterion.getAlternativePlans(estimator);

            SingleRootJoiner singleRoot = (SingleRootJoiner) this.singleRoot;

            for (PlanNode candidate : candidates) {
                for (PlanNode terminationCandidate : terminationCriterionCandidates) {
                    if (singleRoot.areBranchCompatible(candidate, terminationCandidate)) {
                        BulkIterationPlanNode node =
                                new BulkIterationPlanNode(
                                        this,
                                        "BulkIteration (" + this.getOperator().getName() + ")",
                                        in,
                                        pspn,
                                        candidate,
                                        terminationCandidate);
                        GlobalProperties gProps = candidate.getGlobalProperties().clone();
                        LocalProperties lProps = candidate.getLocalProperties().clone();
                        node.initProperties(gProps, lProps);
                        target.add(node);
                    }
                }
            }
        }
    }

    // --------------------------------------------------------------------------------------------
    //                      Iteration Specific Traversals
    // --------------------------------------------------------------------------------------------

    public void acceptForStepFunction(Visitor visitor) {
        this.singleRoot.accept(visitor);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy