All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.table.util.PlanUtil.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.util

import org.apache.flink.api.common.JobExecutionResult
import org.apache.flink.streaming.api.graph.StreamGraph
import org.apache.flink.table.plan.nodes.exec.ExecNode
import org.apache.flink.table.plan.util.FlinkNodeOptUtil
import org.apache.flink.table.runtime.AbstractStreamOperatorWithMetrics._
import org.apache.flink.table.util.PlanUtil._

import org.apache.calcite.sql.SqlExplainLevel
import org.json.JSONObject

import java.io.{PrintWriter, StringWriter}
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths, StandardOpenOption}

import scala.collection.JavaConversions._
import scala.collection.mutable

/**
  * The class is to convert [[StreamGraph]] to plan with metrics.
  *
  * @param streamGraph the stream graph to convert
  */
class PlanWihMetrics(streamGraph: StreamGraph) {

  /**
    * Dump plan with accumulated operator metrics to the given file.
    *
    * @param dumpFilePath file path to dump the stream graph plans with operator metrics.
    * @param jobResult    job result of stream graph
    */
  def dumpPlanWithMetrics(dumpFilePath: String, jobResult: JobExecutionResult): Unit = {
    val planWithMetrics = getPlanWithMetrics(jobResult)
    writeContentToFile(planWithMetrics, dumpFilePath)
  }

  /**
    * Get plan with accumulated operator metrics.
    *
    * @param jobResult job result of stream graph
    * @return plan with accumulated operator metrics.
    */
  def getPlanWithMetrics(jobResult: JobExecutionResult): String = {
    val operatorMetrics = extractMetricAccFromResult(jobResult)
    val sw = new StringWriter()
    val pw = new PrintWriter(sw)
    streamGraph.getSinkIDs.foreach { sinkId =>
      print(sinkId, operatorMetrics, pw)
    }
    pw.close()
    val planWithMetrics = sw.toString
    if (LOG.isDebugEnabled) {
      val jobName = streamGraph.getJobName
      LOG.debug(s"planWithMetrics of Job [$jobName] : \n $planWithMetrics")
    }
    planWithMetrics
  }

  /**
    * Extracts the operator metrics from the accumulators in job result.
    *
    * @param jobResult job result of stream graph
    * @return the operator metrics from the accumulators in job result.
    */
  private def extractMetricAccFromResult(
      jobResult: JobExecutionResult): mutable.Map[Integer, AnyRef] =
    jobResult
      .getAllAccumulatorResults
      .filter(_._1.startsWith(ACCUMULATOR_PREFIX))
      .map { case (accName, accValue) =>
        val nodeID = new Integer(accName.drop(ACCUMULATOR_PREFIX.length))
        (nodeID, accValue)
      }

  private def print(
      vertexID: Int,
      operatorMetrics: mutable.Map[Integer, AnyRef],
      pw: PrintWriter,
      lastChildren: Seq[Boolean] = Nil): Unit = {
    val s = new StringBuilder
    if (lastChildren.nonEmpty) {
      lastChildren.init.foreach { isLast =>
        s.append(if (isLast) "   " else ":  ")
      }
      s.append(if (lastChildren.last) "+- " else ":- ")
    }
    val streamNode = streamGraph.getStreamNode(vertexID)
    val nodeType = if (streamGraph.getSourceIDs.contains(vertexID)) {
      "Data Source"
    } else if (streamGraph.getSinkIDs.contains(vertexID)) {
      "Data Sink"
    } else {
      "Operator"
    }
    s.append(s"Stage $vertexID : $nodeType(${streamNode.getOperatorName}")
    operatorMetrics.get(vertexID) match {
      case Some(metric) =>
        val metricJson = new JSONObject
        // TODO collect more stats
        metricJson.put("rowCount", metric)
        s.append(s" , metric=$metricJson")
      case None =>
    }
    s.append(")")
    pw.println(s)
    val inputNodes = streamNode.getInEdges.map(_.getSourceId)
    if (inputNodes.size() > 1) {
      inputNodes.init.foreach { n =>
        print(n, operatorMetrics, pw, lastChildren :+ false)
      }
    }
    if (inputNodes.nonEmpty) {
      print(inputNodes.last, operatorMetrics, pw, lastChildren :+ true)
    }
  }

}

object PlanUtil extends Logging {

  implicit def toPlanWihMetrics(streamGraph: StreamGraph): PlanWihMetrics =
    new PlanWihMetrics(streamGraph)

  /**
    * Dump ExecNodes into file.
    *
    * @param nodes        ExecNodes to dump
    * @param dumpFilePath file path to write
    */
  def dumpExecNodes(nodes: Seq[ExecNode[_, _]], dumpFilePath: String): Unit = {
    val nodeStr = FlinkNodeOptUtil.dagToString(
      nodes,
      SqlExplainLevel.ALL_ATTRIBUTES,
      withRelNodeId = true,
      withMemCost = true)
    writeContentToFile(nodeStr, dumpFilePath)
    if (LOG.isDebugEnabled) {
      LOG.debug(s"dump ExecNode: \n $nodeStr")
    }
  }

  /**
    * Write content into file.
    * Create a new file if it does not exist.
    * Overwrite the original file if it already exists.
    *
    * @param content  content to dump
    * @param filePath file path to write
    */
  def writeContentToFile(content: String, filePath: String): Unit = {
    require(content != null, "content can not be null! ")
    require(filePath != null && filePath.nonEmpty, "filePath can not be null or empty! ")
    val path = Paths.get(filePath)
    Files.deleteIfExists(path)
    Files.write(
      path,
      Seq(content),
      StandardCharsets.UTF_8,
      StandardOpenOption.CREATE,
      StandardOpenOption.WRITE)
  }

  /**
    * Converting an StreamGraph to a human-readable string.
    *
    * @param graph stream graph
    */
  def explainPlan(graph: StreamGraph): String = {
    def isSource(id: Int): Boolean = graph.getSourceIDs.contains(id)

    def isSink(id: Int): Boolean = graph.getSinkIDs.contains(id)

    // can not convert to single abstract method because it will throw compile error
    implicit val order: Ordering[Int] = new Ordering[Int] {
      override def compare(x: Int, y: Int): Int = (isSink(x), isSink(y)) match {
        case (true, false) => 1
        case (false, true) => -1
        case (_, _) => x - y
      }
    }

    val operatorIDs = graph.getStreamNodes.map(_.getId).toList.sorted(order)
    val sw = new StringWriter()
    val pw = new PrintWriter(sw)

    var tabs = 0
    operatorIDs.foreach { id =>
      val op = graph.getStreamNode(id)
      val (nodeType, content) = if (isSource(id)) {
        tabs = 0
        ("Data Source", "collect elements with CollectionInputFormat")
      } else if (isSink(id)) {
        ("Data Sink", op.getOperatorName)
      } else {
        ("Operator", op.getOperatorName)
      }

      pw.append("\t" * tabs).append(s"Stage $id : $nodeType\n")
        .append("\t" * (tabs + 1)).append(s"content : $content\n")

      if (!isSource(id)) {
        val partition = op.getInEdges.head.getPartitioner.toString
        pw.append("\t" * (tabs + 1)).append(s"ship_strategy : $partition\n")
      }

      pw.append("\n")
      tabs += 1
    }

    pw.close()
    sw.toString
  }

}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy