
com.spotify.scio.ScioResult.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scio-core_2.11 Show documentation
Show all versions of scio-core_2.11 Show documentation
Scio - A Scala API for Apache Beam and Google Cloud Dataflow
The newest version!
/*
* Copyright 2016 Spotify AB.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.spotify.scio
import java.nio.ByteBuffer
import com.google.cloud.dataflow.sdk.PipelineResult.State
import com.google.cloud.dataflow.sdk.options.{ApplicationNameOptions, DataflowPipelineOptions}
import com.google.cloud.dataflow.sdk.runners.{AggregatorPipelineExtractor, AggregatorValues}
import com.google.cloud.dataflow.sdk.transforms.Aggregator
import com.google.cloud.dataflow.sdk.util.{IOChannelUtils, MimeTypes}
import com.google.cloud.dataflow.sdk.{Pipeline, PipelineResult}
import com.spotify.scio.util.ScioUtil
import com.spotify.scio.values.Accumulator
import scala.collection.JavaConverters._
import scala.concurrent.Future
/** Represent a Scio pipeline result. */
class ScioResult private[scio] (val internal: PipelineResult,
val finalState: Future[State],
val accumulators: Seq[Accumulator[_]],
private val pipeline: Pipeline) {
private val aggregators: Map[String, Iterable[Aggregator[_, _]]] =
new AggregatorPipelineExtractor(pipeline)
.getAggregatorSteps
.asScala
.keys
.groupBy(_.getName)
/** Whether the pipeline is completed. */
def isCompleted: Boolean = internal.getState.isTerminal
/** Pipeline's current state. */
def state: State = internal.getState
/** Get the total value of an accumulator. */
def accumulatorTotalValue[T](acc: Accumulator[T]): T = {
require(accumulators.contains(acc), "Accumulator not present in the result")
acc.combineFn(getAggregatorValues(acc).map(_.getTotalValue(acc.combineFn)).asJava)
}
/** Get the values of an accumulator at each step it was used. */
def accumulatorValuesAtSteps[T](acc: Accumulator[T]): Map[String, T] = {
require(accumulators.contains(acc), "Accumulator not present in the result")
getAggregatorValues(acc).flatMap(_.getValuesAtSteps.asScala).toMap
}
/** Save metrics of the finished pipeline to a file. */
def saveMetrics(filename: String): Unit = {
require(isCompleted, "Pipeline has to be finished to save metrics.")
val mapper = ScioUtil.getScalaJsonMapper
val out = IOChannelUtils.create(filename, MimeTypes.TEXT)
try {
out.write(ByteBuffer.wrap(mapper.writeValueAsBytes(getMetrics)))
} finally {
if (out != null) {
out.close()
}
}
def getMetrics: MetricSchema.Metrics = {
import MetricSchema._
val totalValues = accumulators
.map(acc => AccumulatorValue(acc.name, accumulatorTotalValue(acc)))
val stepsValues = accumulators.map(acc => AccumulatorStepsValue(acc.name,
accumulatorValuesAtSteps(acc).map(a => AccumulatorStepValue(a._1, a._2))))
val options = this.pipeline.getOptions
Metrics(scioVersion,
scalaVersion,
options.as(classOf[ApplicationNameOptions]).getAppName,
options.as(classOf[DataflowPipelineOptions]).getJobName,
this.state.toString,
AccumulatorMetrics(totalValues, stepsValues))
}
}
private def getAggregatorValues[T](acc: Accumulator[T]): Iterable[AggregatorValues[T]] =
aggregators.getOrElse(acc.name, Nil)
.map(a => internal.getAggregatorValues(a.asInstanceOf[Aggregator[_, T]]))
}
private[scio] object MetricSchema {
case class Metrics(version: String,
scalaVersion: String,
jobName: String,
jobId: String,
state: String,
accumulators: AccumulatorMetrics)
case class AccumulatorMetrics(total: Iterable[AccumulatorValue],
steps: Iterable[AccumulatorStepsValue])
case class AccumulatorValue(name: String, value: Any)
case class AccumulatorStepValue(name: String, value: Any)
case class AccumulatorStepsValue(name: String, steps: Iterable[AccumulatorStepValue])
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy