All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.ScioResult.scala Maven / Gradle / Ivy

There is a newer version: 0.2.6
Show newest version
/*
 * Copyright 2016 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio

import java.nio.ByteBuffer

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.cloud.dataflow.sdk.PipelineResult.State
import com.google.cloud.dataflow.sdk.options.{ApplicationNameOptions, DataflowPipelineOptions}
import com.google.cloud.dataflow.sdk.runners.{AggregatorPipelineExtractor, AggregatorValues}
import com.google.cloud.dataflow.sdk.transforms.Aggregator
import com.google.cloud.dataflow.sdk.util.{IOChannelUtils, MimeTypes}
import com.google.cloud.dataflow.sdk.{Pipeline, PipelineResult}
import com.spotify.scio.values.Accumulator

import scala.collection.JavaConverters._
import scala.concurrent.Future

/** Represent a Scio pipeline result. */
class ScioResult private[scio] (val internal: PipelineResult,
                                val finalState: Future[State],
                                val accumulators: Seq[Accumulator[_]],
                                private val pipeline: Pipeline) {

  private val aggregators: Map[String, Iterable[Aggregator[_, _]]] =
    new AggregatorPipelineExtractor(pipeline)
      .getAggregatorSteps
      .asScala
      .keys
      .groupBy(_.getName)

  /** Whether the pipeline is completed. */
  def isCompleted: Boolean = internal.getState.isTerminal

  /** Pipeline's current state. */
  def state: State = internal.getState

  /** Get the total value of an accumulator. */
  def accumulatorTotalValue[T](acc: Accumulator[T]): T = {
    acc.combineFn(getAggregatorValues(acc).map(_.getTotalValue(acc.combineFn)).asJava)
  }

  /** Get the values of an accumulator at each step it was used. */
  def accumulatorValuesAtSteps[T](acc: Accumulator[T]): Map[String, T] =
    getAggregatorValues(acc).flatMap(_.getValuesAtSteps.asScala).toMap

  /** Save metrics of the finished pipeline to a file. */
  def saveMetrics(filename: String): Unit = {
    require(isCompleted, "Pipeline has to be finished to save metrics.")

    val mapper = new ObjectMapper()
    mapper.registerModule(DefaultScalaModule)

    val out = IOChannelUtils.create(filename, MimeTypes.TEXT)

    try {
      val totalValues = accumulators
        .map(acc => AccumulatorValue(acc.name, accumulatorTotalValue(acc)))

      val stepsValues = accumulators
        .map(acc => AccumulatorStepsValue(acc.name,
          accumulatorValuesAtSteps(acc).map(a => AccumulatorStepValue(a._1, a._2))))

      val options = this.pipeline.getOptions
      val metrics = Metrics(scioVersion,
                            scalaVersion,
                            options.as(classOf[ApplicationNameOptions]).getAppName,
                            options.as(classOf[DataflowPipelineOptions]).getJobName,
                            AccumulatorMetrics(totalValues, stepsValues))
      out.write(ByteBuffer.wrap(mapper.writeValueAsBytes(metrics)))
    } finally {
      if (out != null) {
        out.close()
      }
    }
  }

  private def getAggregatorValues[T](acc: Accumulator[T]): Iterable[AggregatorValues[T]] =
    aggregators(acc.name).map(a => internal.getAggregatorValues(a.asInstanceOf[Aggregator[_, T]]))

}

private[scio] case class Metrics(version: String,
                                 scalaVersion: String,
                                 jobName: String,
                                 jobId: String,
                                 accumulators: AccumulatorMetrics)
private[scio] case class AccumulatorMetrics(total: Iterable[AccumulatorValue],
                                            steps: Iterable[AccumulatorStepsValue])
private[scio] case class AccumulatorValue(name: String, value: Any)
private[scio] case class AccumulatorStepValue(name: String, value: Any)
private[scio] case class AccumulatorStepsValue(name: String, steps: Iterable[AccumulatorStepValue])




© 2015 - 2025 Weber Informatics LLC | Privacy Policy