All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.testing.PipelineTestUtils.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2019 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.testing

import org.apache.beam.sdk.options._
import com.spotify.scio._
import com.spotify.scio.coders.Coder
import com.spotify.scio.io.{ClosedTap, Tap}
import com.spotify.scio.values.SCollection
import org.apache.beam.sdk.runners.PTransformOverride

import scala.jdk.CollectionConverters._

/** Trait with utility methods for unit testing pipelines. */
trait PipelineTestUtils {

  /**
   * Test pipeline components with a [[ScioContext]].
   * @param fn
   *   code that tests the components and verifies the result
   *
   * {{{
   * runWithContext { sc =>
   *   sc.parallelize(Seq(1, 2, 3)).sum should containSingleValue (6)
   * }
   * }}}
   */
  def runWithContext(fn: ScioContext => Any): ScioExecutionContext = {
    val sc = ScioContext.forTest()
    fn(sc)
    sc.run()
  }

  def runWithRealContext(
    options: PipelineOptions = PipelineOptionsFactory.create()
  )(fn: ScioContext => Any): ScioExecutionContext = {
    val sc = ScioContext(options)
    fn(sc)
    sc.run()
  }

  /**
   * Test pipeline components with in-memory data.
   *
   * Input data is passed to `fn` as an [[com.spotify.scio.values.SCollection SCollection]] and the
   * result [[com.spotify.scio.values.SCollection SCollection]] from `fn` is extracted and to be
   * verified.
   *
   * @param data
   *   input data
   * @param fn
   *   transform to be tested
   * @return
   *   output data
   *
   * {{{
   * runWithData(Seq(1, 2, 3)) { p =>
   *   p.sum
   * } shouldBe Seq(6)
   * }}}
   */
  def runWithData[T: Coder, U](
    data: Iterable[T]
  )(fn: SCollection[T] => SCollection[U]): Seq[U] =
    runWithLocalOutput(sc => fn(sc.parallelize(data)))._2

  /**
   * Test pipeline components with in-memory data.
   *
   * Input data is passed to `fn` as [[com.spotify.scio.values.SCollection SCollection]] s and the
   * result [[com.spotify.scio.values.SCollection SCollection]] from `fn` is extracted and to be
   * verified.
   *
   * @param data1
   *   input data
   * @param data2
   *   input data
   * @param fn
   *   transform to be tested
   * @return
   *   output data
   */
  def runWithData[T1: Coder, T2: Coder, U](data1: Iterable[T1], data2: Iterable[T2])(
    fn: (SCollection[T1], SCollection[T2]) => SCollection[U]
  ): Seq[U] =
    runWithLocalOutput(sc => fn(sc.parallelize(data1), sc.parallelize(data2)))._2

  /**
   * Test pipeline components with in-memory data.
   *
   * Input data is passed to `fn` as [[com.spotify.scio.values.SCollection SCollection]] s and the
   * result [[com.spotify.scio.values.SCollection SCollection]] from `fn` is extracted and to be
   * verified.
   *
   * @param data1
   *   input data
   * @param data2
   *   input data
   * @param data3
   *   input data
   * @param fn
   *   transform to be tested
   * @return
   *   output data
   */
  def runWithData[T1: Coder, T2: Coder, T3: Coder, U](
    data1: Iterable[T1],
    data2: Iterable[T2],
    data3: Iterable[T3]
  )(fn: (SCollection[T1], SCollection[T2], SCollection[T3]) => SCollection[U]): Seq[U] =
    runWithLocalOutput { sc =>
      fn(sc.parallelize(data1), sc.parallelize(data2), sc.parallelize(data3))
    }._2

  /**
   * Test pipeline components with in-memory data.
   *
   * Input data is passed to `fn` as [[com.spotify.scio.values.SCollection SCollection]] s and the
   * result [[com.spotify.scio.values.SCollection SCollection]] from `fn` is extracted and to be
   * verified.
   *
   * @param data1
   *   input data
   * @param data2
   *   input data
   * @param data3
   *   input data
   * @param data4
   *   input data
   * @param fn
   *   transform to be tested
   * @return
   *   output data
   */
  def runWithData[T1: Coder, T2: Coder, T3: Coder, T4: Coder, U](
    data1: Iterable[T1],
    data2: Iterable[T2],
    data3: Iterable[T3],
    data4: Iterable[T4]
  )(
    fn: (SCollection[T1], SCollection[T2], SCollection[T3], SCollection[T4]) => SCollection[U]
  ): Seq[U] =
    runWithLocalOutput { sc =>
      fn(sc.parallelize(data1), sc.parallelize(data2), sc.parallelize(data3), sc.parallelize(data4))
    }._2

  /**
   * Test pipeline components with a [[ScioContext]] and replace transforms with the provided
   * override.
   *
   * @param fn
   *   code that tests the components and verifies the result
   *
   * {{{
   * runWithOverrides(TransformOverride.of("operation", (v: Int) => v.toString)) { sc =>
   *   val result = sc.parallelize(Seq(1, 2, 3))
   *     .withName("operation")
   *     .map(operationFn)
   *   result should contain theSameElementAs("1", "2", "3")
   * }
   * }}}
   */
  def runWithOverrides(
    overrides: PTransformOverride*
  )(fn: ScioContext => Any): ScioExecutionContext = {
    val sc = ScioContext.forTest()
    fn(sc)
    sc.pipeline.replaceAll(overrides.toList.asJava)
    sc.run()
  }

  /**
   * Test pipeline components with a [[ScioContext]] and access to the output's tap.
   *
   * @param fn
   *   transform to be tested
   * @return
   *   a tuple containing the [[ScioResult]] and the [[Tap]] to read back data written by the tested
   *   pipeline
   */
  def runWithOutput[T](fn: ScioContext => ClosedTap[T]): (ScioResult, Tap[T]) = {
    val sc = ScioContext()
    val tap = fn(sc)
    val result = sc.run().waitUntilFinish() // block non-test runner
    (result, result.tap(tap))
  }

  /**
   * Test pipeline components with a [[ScioContext]] and materialized resulting collection.
   *
   * The result [[com.spotify.scio.values.SCollection SCollection]] from `fn` is extracted and to be
   * verified.
   *
   * @param fn
   *   transform to be tested
   * @return
   *   a tuple containing the [[ScioResult]] and the materialized result of fn as a
   *   [[scala.collection.Seq Seq]]
   */
  def runWithLocalOutput[U](fn: ScioContext => SCollection[U]): (ScioResult, Seq[U]) = {
    val (result, tap) = runWithOutput(fn.andThen(_.materialize))
    (result, tap.value.toSeq)
  }

  /**
   * Test pipeline components with a [[ScioContext]] and materialized resulting collections.
   *
   * @param fn
   *   transform to be tested
   * @return
   *   a tuple containing the [[ScioResult]] and the materialized results of the SCollections
   *   returned by fn as [[scala.collection.Seq Seq]]s
   */
  def runWithLocalOutput[T, U](
    fn: ScioContext => (SCollection[T], SCollection[U])
  ): (ScioResult, Seq[T], Seq[U]) = {
    val sc = ScioContext()
    val (t, u) = fn(sc)
    val (tMat, uMat) = (t.materialize, u.materialize)
    val result = sc.run().waitUntilFinish() // block non-test runner
    (result, result.tap(tMat).value.toSeq, result.tap(uMat).value.toSeq)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy