All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.optimizer.plandump.PlanJSONDumpGenerator Maven / Gradle / Ivy

There is a newer version: 1.20.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.optimizer.plandump;

import org.apache.flink.api.common.operators.CompilerHints;
import org.apache.flink.optimizer.CompilerException;
import org.apache.flink.optimizer.dag.BinaryUnionNode;
import org.apache.flink.optimizer.dag.BulkIterationNode;
import org.apache.flink.optimizer.dag.DataSinkNode;
import org.apache.flink.optimizer.dag.DataSourceNode;
import org.apache.flink.optimizer.dag.OptimizerNode;
import org.apache.flink.optimizer.dag.TempMode;
import org.apache.flink.optimizer.dag.WorksetIterationNode;
import org.apache.flink.optimizer.dataproperties.GlobalProperties;
import org.apache.flink.optimizer.dataproperties.LocalProperties;
import org.apache.flink.optimizer.plan.BulkIterationPlanNode;
import org.apache.flink.optimizer.plan.Channel;
import org.apache.flink.optimizer.plan.OptimizedPlan;
import org.apache.flink.optimizer.plan.PlanNode;
import org.apache.flink.optimizer.plan.SingleInputPlanNode;
import org.apache.flink.optimizer.plan.SinkPlanNode;
import org.apache.flink.optimizer.plan.WorksetIterationPlanNode;
import org.apache.flink.optimizer.util.Utils;
import org.apache.flink.runtime.operators.DriverStrategy;
import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
import org.apache.flink.util.StringUtils;

import org.apache.commons.text.StringEscapeUtils;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;

public class PlanJSONDumpGenerator {

    private Map, Integer> nodeIds; // resolves pact nodes to ids

    private int nodeCnt;

    private boolean encodeForHTML;

    // --------------------------------------------------------------------------------------------

    public void setEncodeForHTML(boolean encodeForHTML) {
        this.encodeForHTML = encodeForHTML;
    }

    public boolean isEncodeForHTML() {
        return encodeForHTML;
    }

    public void dumpPactPlanAsJSON(List nodes, PrintWriter writer) {
        @SuppressWarnings("unchecked")
        List> n = (List>) (List) nodes;
        compilePlanToJSON(n, writer);
    }

    public String getPactPlanAsJSON(List nodes) {
        StringWriter sw = new StringWriter();
        PrintWriter pw = new PrintWriter(sw);
        dumpPactPlanAsJSON(nodes, pw);
        return sw.toString();
    }

    public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, File toFile) throws IOException {
        PrintWriter pw = null;
        try {
            pw = new PrintWriter(new FileOutputStream(toFile), false);
            dumpOptimizerPlanAsJSON(plan, pw);
            pw.flush();
        } finally {
            if (pw != null) {
                pw.close();
            }
        }
    }

    public String getOptimizerPlanAsJSON(OptimizedPlan plan) {
        StringWriter sw = new StringWriter();
        PrintWriter pw = new PrintWriter(sw);
        dumpOptimizerPlanAsJSON(plan, pw);
        pw.close();
        return sw.toString();
    }

    public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, PrintWriter writer) {
        Collection sinks = plan.getDataSinks();
        if (sinks instanceof List) {
            dumpOptimizerPlanAsJSON((List) sinks, writer);
        } else {
            List n = new ArrayList();
            n.addAll(sinks);
            dumpOptimizerPlanAsJSON(n, writer);
        }
    }

    public void dumpOptimizerPlanAsJSON(List nodes, PrintWriter writer) {
        @SuppressWarnings("unchecked")
        List> n = (List>) (List) nodes;
        compilePlanToJSON(n, writer);
    }

    // --------------------------------------------------------------------------------------------

    private void compilePlanToJSON(List> nodes, PrintWriter writer) {
        // initialization to assign node ids
        this.nodeIds = new HashMap, Integer>();
        this.nodeCnt = 0;

        // JSON header
        writer.print("{\n\t\"nodes\": [\n\n");

        // Generate JSON for plan
        for (int i = 0; i < nodes.size(); i++) {
            visit(nodes.get(i), writer, i == 0);
        }

        // JSON Footer
        writer.println("\n\t]\n}");
    }

    private boolean visit(DumpableNode node, PrintWriter writer, boolean first) {
        // check for duplicate traversal
        if (this.nodeIds.containsKey(node)) {
            return false;
        }

        // assign an id first
        this.nodeIds.put(node, this.nodeCnt++);

        // then recurse
        for (DumpableNode child : node.getPredecessors()) {
            // This is important, because when the node was already in the graph it is not allowed
            // to set first to false!
            if (visit(child, writer, first)) {
                first = false;
            }
        }

        // check if this node should be skipped from the dump
        final OptimizerNode n = node.getOptimizerNode();

        // ------------------ dump after the ascend ---------------------
        // start a new node and output node id
        if (!first) {
            writer.print(",\n");
        }
        // open the node
        writer.print("\t{\n");

        // recurse, it is is an iteration node
        if (node instanceof BulkIterationNode || node instanceof BulkIterationPlanNode) {

            DumpableNode innerChild =
                    node instanceof BulkIterationNode
                            ? ((BulkIterationNode) node).getNextPartialSolution()
                            : ((BulkIterationPlanNode) node).getRootOfStepFunction();

            DumpableNode begin =
                    node instanceof BulkIterationNode
                            ? ((BulkIterationNode) node).getPartialSolution()
                            : ((BulkIterationPlanNode) node).getPartialSolutionPlanNode();

            writer.print("\t\t\"step_function\": [\n");

            visit(innerChild, writer, true);

            writer.print("\n\t\t],\n");
            writer.print("\t\t\"partial_solution\": " + this.nodeIds.get(begin) + ",\n");
            writer.print("\t\t\"next_partial_solution\": " + this.nodeIds.get(innerChild) + ",\n");
        } else if (node instanceof WorksetIterationNode
                || node instanceof WorksetIterationPlanNode) {

            DumpableNode worksetRoot =
                    node instanceof WorksetIterationNode
                            ? ((WorksetIterationNode) node).getNextWorkset()
                            : ((WorksetIterationPlanNode) node).getNextWorkSetPlanNode();
            DumpableNode solutionDelta =
                    node instanceof WorksetIterationNode
                            ? ((WorksetIterationNode) node).getSolutionSetDelta()
                            : ((WorksetIterationPlanNode) node).getSolutionSetDeltaPlanNode();

            DumpableNode workset =
                    node instanceof WorksetIterationNode
                            ? ((WorksetIterationNode) node).getWorksetNode()
                            : ((WorksetIterationPlanNode) node).getWorksetPlanNode();
            DumpableNode solutionSet =
                    node instanceof WorksetIterationNode
                            ? ((WorksetIterationNode) node).getSolutionSetNode()
                            : ((WorksetIterationPlanNode) node).getSolutionSetPlanNode();

            writer.print("\t\t\"step_function\": [\n");

            visit(worksetRoot, writer, true);
            visit(solutionDelta, writer, false);

            writer.print("\n\t\t],\n");
            writer.print("\t\t\"workset\": " + this.nodeIds.get(workset) + ",\n");
            writer.print("\t\t\"solution_set\": " + this.nodeIds.get(solutionSet) + ",\n");
            writer.print("\t\t\"next_workset\": " + this.nodeIds.get(worksetRoot) + ",\n");
            writer.print("\t\t\"solution_delta\": " + this.nodeIds.get(solutionDelta) + ",\n");
        }

        // print the id
        writer.print("\t\t\"id\": " + this.nodeIds.get(node));

        final String type;
        String contents;
        if (n instanceof DataSinkNode) {
            type = "sink";
            contents = n.getOperator().toString();
        } else if (n instanceof DataSourceNode) {
            type = "source";
            contents = n.getOperator().toString();
        } else if (n instanceof BulkIterationNode) {
            type = "bulk_iteration";
            contents = n.getOperator().getName();
        } else if (n instanceof WorksetIterationNode) {
            type = "workset_iteration";
            contents = n.getOperator().getName();
        } else if (n instanceof BinaryUnionNode) {
            type = "pact";
            contents = "";
        } else {
            type = "pact";
            contents = n.getOperator().getName();
        }

        contents = StringUtils.showControlCharacters(contents);
        if (encodeForHTML) {
            contents = StringEscapeUtils.escapeHtml4(contents);
            contents = contents.replace("\\", "\");
        }

        String name = n.getOperatorName();
        if (name.equals("Reduce")
                && (node instanceof SingleInputPlanNode)
                && ((SingleInputPlanNode) node).getDriverStrategy()
                        == DriverStrategy.SORTED_GROUP_COMBINE) {
            name = "Combine";
        }

        // output the type identifier
        writer.print(",\n\t\t\"type\": \"" + type + "\"");

        // output node name
        writer.print(",\n\t\t\"pact\": \"" + name + "\"");

        // output node contents
        writer.print(",\n\t\t\"contents\": \"" + contents + "\"");

        // parallelism
        writer.print(
                ",\n\t\t\"parallelism\": \""
                        + (n.getParallelism() >= 1 ? n.getParallelism() : "default")
                        + "\"");

        // output node predecessors
        Iterator> inConns = node.getDumpableInputs().iterator();
        String child1name = "", child2name = "";

        if (inConns != null && inConns.hasNext()) {
            // start predecessor list
            writer.print(",\n\t\t\"predecessors\": [");
            int inputNum = 0;

            while (inConns.hasNext()) {
                final DumpableConnection inConn = inConns.next();
                final DumpableNode source = inConn.getSource();
                writer.print(inputNum == 0 ? "\n" : ",\n");
                if (inputNum == 0) {
                    child1name += child1name.length() > 0 ? ", " : "";
                    child1name +=
                            source.getOptimizerNode().getOperator().getName()
                                    + " (id: "
                                    + this.nodeIds.get(source)
                                    + ")";
                } else if (inputNum == 1) {
                    child2name += child2name.length() > 0 ? ", " : "";
                    child2name +=
                            source.getOptimizerNode().getOperator().getName()
                                    + " (id: "
                                    + this.nodeIds.get(source)
                                    + ")";
                }

                // output predecessor id
                writer.print("\t\t\t{\"id\": " + this.nodeIds.get(source));

                // output connection side
                if (inConns.hasNext() || inputNum > 0) {
                    writer.print(", \"side\": \"" + (inputNum == 0 ? "first" : "second") + "\"");
                }
                // output shipping strategy and channel type
                final Channel channel = (inConn instanceof Channel) ? (Channel) inConn : null;
                final ShipStrategyType shipType =
                        channel != null ? channel.getShipStrategy() : inConn.getShipStrategy();

                String shipStrategy = null;
                if (shipType != null) {
                    switch (shipType) {
                        case NONE:
                            // nothing
                            break;
                        case FORWARD:
                            shipStrategy = "Forward";
                            break;
                        case BROADCAST:
                            shipStrategy = "Broadcast";
                            break;
                        case PARTITION_HASH:
                            shipStrategy = "Hash Partition";
                            break;
                        case PARTITION_RANGE:
                            shipStrategy = "Range Partition";
                            break;
                        case PARTITION_RANDOM:
                            shipStrategy = "Redistribute";
                            break;
                        case PARTITION_FORCED_REBALANCE:
                            shipStrategy = "Rebalance";
                            break;
                        case PARTITION_CUSTOM:
                            shipStrategy = "Custom Partition";
                            break;
                        default:
                            throw new CompilerException(
                                    "Unknown ship strategy '"
                                            + inConn.getShipStrategy().name()
                                            + "' in JSON generator.");
                    }
                }

                if (channel != null
                        && channel.getShipStrategyKeys() != null
                        && channel.getShipStrategyKeys().size() > 0) {
                    shipStrategy +=
                            " on "
                                    + (channel.getShipStrategySortOrder() == null
                                            ? channel.getShipStrategyKeys().toString()
                                            : Utils.createOrdering(
                                                            channel.getShipStrategyKeys(),
                                                            channel.getShipStrategySortOrder())
                                                    .toString());
                }

                if (shipStrategy != null) {
                    writer.print(", \"ship_strategy\": \"" + shipStrategy + "\"");
                }

                if (channel != null) {
                    String localStrategy = null;
                    switch (channel.getLocalStrategy()) {
                        case NONE:
                            break;
                        case SORT:
                            localStrategy = "Sort";
                            break;
                        case COMBININGSORT:
                            localStrategy = "Sort (combining)";
                            break;
                        default:
                            throw new CompilerException(
                                    "Unknown local strategy " + channel.getLocalStrategy().name());
                    }

                    if (channel != null
                            && channel.getLocalStrategyKeys() != null
                            && channel.getLocalStrategyKeys().size() > 0) {
                        localStrategy +=
                                " on "
                                        + (channel.getLocalStrategySortOrder() == null
                                                ? channel.getLocalStrategyKeys().toString()
                                                : Utils.createOrdering(
                                                                channel.getLocalStrategyKeys(),
                                                                channel.getLocalStrategySortOrder())
                                                        .toString());
                    }

                    if (localStrategy != null) {
                        writer.print(", \"local_strategy\": \"" + localStrategy + "\"");
                    }

                    if (channel != null && channel.getTempMode() != TempMode.NONE) {
                        String tempMode = channel.getTempMode().toString();
                        writer.print(", \"temp_mode\": \"" + tempMode + "\"");
                    }

                    if (channel != null) {
                        String exchangeMode = channel.getDataExchangeMode().toString();
                        writer.print(", \"exchange_mode\": \"" + exchangeMode + "\"");
                    }
                }

                writer.print('}');
                inputNum++;
            }
            // finish predecessors
            writer.print("\n\t\t]");
        }

        // ---------------------------------------------------------------------------------------
        // the part below here is relevant only to plan nodes with concrete strategies, etc
        // ---------------------------------------------------------------------------------------

        final PlanNode p = node.getPlanNode();
        if (p == null) {
            // finish node
            writer.print("\n\t}");
            return true;
        }
        // local strategy
        String locString = null;
        if (p.getDriverStrategy() != null) {
            switch (p.getDriverStrategy()) {
                case NONE:
                case BINARY_NO_OP:
                    break;

                case UNARY_NO_OP:
                    locString = "No-Op";
                    break;

                case MAP:
                    locString = "Map";
                    break;

                case FLAT_MAP:
                    locString = "FlatMap";
                    break;

                case MAP_PARTITION:
                    locString = "Map Partition";
                    break;

                case ALL_REDUCE:
                    locString = "Reduce All";
                    break;

                case ALL_GROUP_REDUCE:
                case ALL_GROUP_REDUCE_COMBINE:
                    locString = "Group Reduce All";
                    break;

                case SORTED_REDUCE:
                    locString = "Sorted Reduce";
                    break;

                case SORTED_PARTIAL_REDUCE:
                    locString = "Sorted Combine/Reduce";
                    break;

                case SORTED_GROUP_REDUCE:
                    locString = "Sorted Group Reduce";
                    break;

                case SORTED_GROUP_COMBINE:
                    locString = "Sorted Combine";
                    break;

                case HYBRIDHASH_BUILD_FIRST:
                    locString = "Hybrid Hash (build: " + child1name + ")";
                    break;
                case HYBRIDHASH_BUILD_SECOND:
                    locString = "Hybrid Hash (build: " + child2name + ")";
                    break;

                case HYBRIDHASH_BUILD_FIRST_CACHED:
                    locString = "Hybrid Hash (CACHED) (build: " + child1name + ")";
                    break;
                case HYBRIDHASH_BUILD_SECOND_CACHED:
                    locString = "Hybrid Hash (CACHED) (build: " + child2name + ")";
                    break;

                case NESTEDLOOP_BLOCKED_OUTER_FIRST:
                    locString = "Nested Loops (Blocked Outer: " + child1name + ")";
                    break;
                case NESTEDLOOP_BLOCKED_OUTER_SECOND:
                    locString = "Nested Loops (Blocked Outer: " + child2name + ")";
                    break;
                case NESTEDLOOP_STREAMED_OUTER_FIRST:
                    locString = "Nested Loops (Streamed Outer: " + child1name + ")";
                    break;
                case NESTEDLOOP_STREAMED_OUTER_SECOND:
                    locString = "Nested Loops (Streamed Outer: " + child2name + ")";
                    break;

                case INNER_MERGE:
                    locString = "Merge";
                    break;

                case CO_GROUP:
                    locString = "Co-Group";
                    break;

                default:
                    locString = p.getDriverStrategy().name();
                    break;
            }

            if (locString != null) {
                writer.print(",\n\t\t\"driver_strategy\": \"");
                writer.print(locString);
                writer.print("\"");
            }
        }

        {
            // output node global properties
            final GlobalProperties gp = p.getGlobalProperties();

            writer.print(",\n\t\t\"global_properties\": [\n");

            addProperty(writer, "Partitioning", gp.getPartitioning().name(), true);
            if (gp.getPartitioningFields() != null) {
                addProperty(writer, "Partitioned on", gp.getPartitioningFields().toString(), false);
            }
            if (gp.getPartitioningOrdering() != null) {
                addProperty(
                        writer,
                        "Partitioning Order",
                        gp.getPartitioningOrdering().toString(),
                        false);
            } else {
                addProperty(writer, "Partitioning Order", "(none)", false);
            }
            if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
                addProperty(writer, "Uniqueness", "not unique", false);
            } else {
                addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
            }

            writer.print("\n\t\t]");
        }

        {
            // output node local properties
            LocalProperties lp = p.getLocalProperties();

            writer.print(",\n\t\t\"local_properties\": [\n");

            if (lp.getOrdering() != null) {
                addProperty(writer, "Order", lp.getOrdering().toString(), true);
            } else {
                addProperty(writer, "Order", "(none)", true);
            }
            if (lp.getGroupedFields() != null && lp.getGroupedFields().size() > 0) {
                addProperty(writer, "Grouped on", lp.getGroupedFields().toString(), false);
            } else {
                addProperty(writer, "Grouping", "not grouped", false);
            }
            if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
                addProperty(writer, "Uniqueness", "not unique", false);
            } else {
                addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
            }

            writer.print("\n\t\t]");
        }

        // output node size estimates
        writer.print(",\n\t\t\"estimates\": [\n");

        addProperty(
                writer,
                "Est. Output Size",
                n.getEstimatedOutputSize() == -1
                        ? "(unknown)"
                        : formatNumber(n.getEstimatedOutputSize(), "B"),
                true);
        addProperty(
                writer,
                "Est. Cardinality",
                n.getEstimatedNumRecords() == -1
                        ? "(unknown)"
                        : formatNumber(n.getEstimatedNumRecords()),
                false);

        writer.print("\t\t]");

        // output node cost
        if (p.getNodeCosts() != null) {
            writer.print(",\n\t\t\"costs\": [\n");

            addProperty(
                    writer,
                    "Network",
                    p.getNodeCosts().getNetworkCost() == -1
                            ? "(unknown)"
                            : formatNumber(p.getNodeCosts().getNetworkCost(), "B"),
                    true);
            addProperty(
                    writer,
                    "Disk I/O",
                    p.getNodeCosts().getDiskCost() == -1
                            ? "(unknown)"
                            : formatNumber(p.getNodeCosts().getDiskCost(), "B"),
                    false);
            addProperty(
                    writer,
                    "CPU",
                    p.getNodeCosts().getCpuCost() == -1
                            ? "(unknown)"
                            : formatNumber(p.getNodeCosts().getCpuCost(), ""),
                    false);

            addProperty(
                    writer,
                    "Cumulative Network",
                    p.getCumulativeCosts().getNetworkCost() == -1
                            ? "(unknown)"
                            : formatNumber(p.getCumulativeCosts().getNetworkCost(), "B"),
                    false);
            addProperty(
                    writer,
                    "Cumulative Disk I/O",
                    p.getCumulativeCosts().getDiskCost() == -1
                            ? "(unknown)"
                            : formatNumber(p.getCumulativeCosts().getDiskCost(), "B"),
                    false);
            addProperty(
                    writer,
                    "Cumulative CPU",
                    p.getCumulativeCosts().getCpuCost() == -1
                            ? "(unknown)"
                            : formatNumber(p.getCumulativeCosts().getCpuCost(), ""),
                    false);

            writer.print("\n\t\t]");
        }

        // output the node compiler hints
        if (n.getOperator().getCompilerHints() != null) {
            CompilerHints hints = n.getOperator().getCompilerHints();
            CompilerHints defaults = new CompilerHints();

            String size =
                    hints.getOutputSize() == defaults.getOutputSize()
                            ? "(none)"
                            : String.valueOf(hints.getOutputSize());
            String card =
                    hints.getOutputCardinality() == defaults.getOutputCardinality()
                            ? "(none)"
                            : String.valueOf(hints.getOutputCardinality());
            String width =
                    hints.getAvgOutputRecordSize() == defaults.getAvgOutputRecordSize()
                            ? "(none)"
                            : String.valueOf(hints.getAvgOutputRecordSize());
            String filter =
                    hints.getFilterFactor() == defaults.getFilterFactor()
                            ? "(none)"
                            : String.valueOf(hints.getFilterFactor());

            writer.print(",\n\t\t\"compiler_hints\": [\n");

            addProperty(writer, "Output Size (bytes)", size, true);
            addProperty(writer, "Output Cardinality", card, false);
            addProperty(writer, "Avg. Output Record Size (bytes)", width, false);
            addProperty(writer, "Filter Factor", filter, false);

            writer.print("\t\t]");
        }

        // finish node
        writer.print("\n\t}");
        return true;
    }

    private void addProperty(PrintWriter writer, String name, String value, boolean first) {
        if (!first) {
            writer.print(",\n");
        }
        writer.print("\t\t\t{ \"name\": \"");
        writer.print(name);
        writer.print("\", \"value\": \"");
        writer.print(value);
        writer.print("\" }");
    }

    public static String formatNumber(double number) {
        return formatNumber(number, "");
    }

    public static String formatNumber(double number, String suffix) {
        if (number <= 0.0) {
            return String.valueOf(number);
        }

        int power = (int) Math.ceil(Math.log10(number));

        int group = (power - 1) / 3;
        if (group >= SIZE_SUFFIXES.length) {
            group = SIZE_SUFFIXES.length - 1;
        } else if (group < 0) {
            group = 0;
        }

        // truncate fractional part
        int beforeDecimal = power - group * 3;
        if (power > beforeDecimal) {
            for (int i = power - beforeDecimal; i > 0; i--) {
                number /= 10;
            }
        }

        return group > 0
                ? String.format(Locale.US, "%.2f %s", number, SIZE_SUFFIXES[group])
                : String.format(Locale.US, "%.2f", number);
    }

    private static final char[] SIZE_SUFFIXES = {0, 'K', 'M', 'G', 'T'};
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy