All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.allenai.pipeline.Workflow.scala Maven / Gradle / Ivy

The newest version!
package org.allenai.pipeline

import org.allenai.common.Resource

import spray.json._
import spray.json.DefaultJsonProtocol._

import scala.io.Source

import java.net.URI

/** DAG representation of the execution of a set of Producers.
  */
case class Workflow(nodes: Map[String, Node], links: Iterable[Link]) {
  def sourceNodes() = nodes.filter {
    case (nodeId, node) =>
      !links.exists(link => link.toId == nodeId)
  }

  def sinkNodes() = nodes.filter {
    case (nodeId, node) =>
      !links.exists(link => link.fromId == nodeId)
  }
}

/** Represents a Producer instance with PipelineRunnerSupport */
case class Node(
  info: CodeInfo,
  params: Map[String, String],
  outputPath: Option[URI]
)

/** Represents dependency between Producer instances */
case class Link(fromId: String, toId: String, name: String)

object Workflow {
  private type PRS = PipelineRunnerSupport // Reduce line length
  def forPipeline(steps: PRS*): Workflow = {
    def findNodes(s: PRS): Iterable[PRS] =
      Seq(s) ++ s.signature.dependencies.flatMap(t => findNodes(t._2))

    val nodeList = for {
      step <- steps
      stepInfo <- findNodes(step)
      sig = stepInfo.signature
      codeInfo = stepInfo.codeInfo
    } yield {
      (sig.id, Node(codeInfo, sig.parameters, stepInfo.outputLocation))
    }

    def findLinks(s: PRS): Iterable[(PRS, PRS, String)] =
      s.signature.dependencies.map { case (name, dep) => (dep, s, name) } ++
        s.signature.dependencies.flatMap(t => findLinks(t._2))

    val nodes = nodeList.toMap

    val links = (for {
      step <- steps
      (from, to, name) <- findLinks(step)
    } yield Link(from.signature.id, to.signature.id, name)).toSet
    Workflow(nodes, links)
  }

  implicit val jsFormat = {
    import CodeInfo._
    implicit val nodeFormat = jsonFormat3(Node)
    implicit val linkFormat = jsonFormat3(Link)
    jsonFormat2(Workflow.apply)
  }

  private def link(uri: URI) = uri.getScheme match {
    case "s3" => new java.net.URI("http", s"${uri.getHost}.s3.amazonaws.com", uri.getPath,
      null).toString
    case _ => uri.toString
  }

  private val DEFAULT_MAX_SIZE = 40
  private val LHS_MAX_SIZE = 15
  private def limitLength(s: String, maxLength: Int = DEFAULT_MAX_SIZE) =
    if (s.size < maxLength) {
      s
    } else {
      val leftSize = math.min(LHS_MAX_SIZE, maxLength / 3)
      val rightSize = maxLength - leftSize
      s"${s.take(leftSize)}...${s.drop(s.size - rightSize)}"
    }

  def renderHtml(w: Workflow): String = {
    val sourceNodes = w.sourceNodes()
    val sinkNodes = w.sinkNodes()
    // Collect nodes with output paths to be displayed in the upper-left.
    val outputNodeLinks = for {
      (id, Node(info, params, outputPath)) <- w.nodes.toList
      path <- outputPath
    } yield {
      s"""${info.className}"""
    }
    val addNodes =
      for ((id, Node(info, params, outputPath)) <- w.nodes) yield {
        // Params show up as line items in the pipeline diagram node.
        val paramsText = params.toList.map {
          case (key, value) =>
            s""""$key=${limitLength(value)}""""
        }.mkString(",")
        // A link is like a param but it hyperlinks somewhere.
        val links =
          // An optional link to the source data.
          info.srcUrl.map(uri => s"""new Link("${link(uri)}","v${info.buildId}")""") ++
            // An optional link to the output data.
            outputPath.map(uri => s"""new Link("${link(uri)}","output")""")
        val clazz = sourceNodes match {
          case _ if sourceNodes contains id => "sourceNode"
          case _ if sinkNodes contains id => "sinkNode"
          case _ => ""
        }
        val linksText = links.mkString(",")
        s"""        g.setNode("$id", {
           |          class: "$clazz",
           |          labelType: "html",
           |          label: generateStepContent("${info.className}",
           |            [$paramsText],
           |            [$linksText])
           |        });""".stripMargin
      }
    val addEdges =
      for (Link(from, to, name) <- w.links) yield {
        s"""        g.setEdge("$from", "$to", {label: "$name"}); """
      }

    val resourceName = "pipelineSummary.html"
    val resourceUrl = this.getClass.getResource(resourceName)
    require(resourceUrl != null, s"Could not find resource: ${resourceName}")
    val template = Resource.using(Source.fromURL(resourceUrl)) { source =>
      source.mkString
    }
    val outputNodeHtml = outputNodeLinks.map("
  • " + _ + "
  • ").mkString("
      ", "\n", "
    ") template.format(outputNodeHtml, addNodes.mkString("\n\n"), addEdges.mkString("\n\n")) } }




    © 2015 - 2024 Weber Informatics LLC | Privacy Policy