All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.spotify.scio.bigtable.BigTableIO.scala Maven / Gradle / Ivy
/*
* Copyright 2019 Spotify AB.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.spotify.scio.bigtable
import com.google.bigtable.v2._
import com.google.cloud.bigtable.config.BigtableOptions
import com.google.protobuf.ByteString
import com.spotify.scio.ScioContext
import com.spotify.scio.coders.{Coder, CoderMaterializer}
import com.spotify.scio.io.{EmptyTap, EmptyTapOf, ScioIO, Tap, TapT, TestIO}
import com.spotify.scio.util.Functions
import com.spotify.scio.values.SCollection
import org.apache.beam.sdk.io.gcp.{bigtable => beam}
import org.apache.beam.sdk.io.range.ByteKeyRange
import org.apache.beam.sdk.values.KV
import org.joda.time.Duration
import org.typelevel.scalaccompat.annotation.nowarn
import scala.jdk.CollectionConverters._
import scala.util.chaining._
sealed trait BigtableIO[T] extends ScioIO[T] {
final override val tapT: TapT.Aux[T, Nothing] = EmptyTapOf[T]
}
object BigtableIO {
final def apply[T](projectId: String, instanceId: String, tableId: String): BigtableIO[T] =
new BigtableIO[T] with TestIO[T] {
override def testId: String =
s"BigtableIO($projectId\t$instanceId\t$tableId)"
}
}
final case class BigtableRead(bigtableOptions: BigtableOptions, tableId: String)
extends BigtableIO[Row] {
override type ReadP = BigtableRead.ReadParam
override type WriteP = Nothing
override def testId: String =
s"BigtableIO(${bigtableOptions.getProjectId}\t${bigtableOptions.getInstanceId}\t$tableId)"
override protected def read(sc: ScioContext, params: ReadP): SCollection[Row] = {
val coder = CoderMaterializer.beam(sc, Coder.protoMessageCoder[Row])
val opts = bigtableOptions // defeat closure
val read = beam.BigtableIO
.read()
.withProjectId(bigtableOptions.getProjectId)
.withInstanceId(bigtableOptions.getInstanceId)
.withTableId(tableId)
.withBigtableOptionsConfigurator(Functions.serializableFn(_ => opts.toBuilder))
.withMaxBufferElementCount(params.maxBufferElementCount.map(Int.box).orNull)
.pipe(r => if (params.keyRanges.isEmpty) r else r.withKeyRanges(params.keyRanges.asJava))
.pipe(r => Option(params.rowFilter).fold(r)(r.withRowFilter)): @nowarn("cat=deprecation")
sc.applyTransform(read).setCoder(coder)
}
override protected def write(data: SCollection[Row], params: WriteP): Tap[Nothing] =
throw new UnsupportedOperationException(
"BigtableRead is read-only, use Mutation to write to Bigtable"
)
override def tap(params: ReadP): Tap[Nothing] =
throw new NotImplementedError("Bigtable tap not implemented")
}
object BigtableRead {
object ReadParam {
val DefaultKeyRanges: Seq[ByteKeyRange] = Seq.empty[ByteKeyRange]
val DefaultRowFilter: RowFilter = null
val DefaultMaxBufferElementCount: Option[Int] = None
def apply(keyRange: ByteKeyRange) = new ReadParam(Seq(keyRange))
def apply(keyRange: ByteKeyRange, rowFilter: RowFilter): ReadParam =
new ReadParam(Seq(keyRange), rowFilter)
}
final case class ReadParam private (
keyRanges: Seq[ByteKeyRange] = ReadParam.DefaultKeyRanges,
rowFilter: RowFilter = ReadParam.DefaultRowFilter,
maxBufferElementCount: Option[Int] = ReadParam.DefaultMaxBufferElementCount
)
final def apply(projectId: String, instanceId: String, tableId: String): BigtableRead = {
val bigtableOptions = BigtableOptions
.builder()
.setProjectId(projectId)
.setInstanceId(instanceId)
.build
BigtableRead(bigtableOptions, tableId)
}
}
final case class BigtableWrite[T <: Mutation](bigtableOptions: BigtableOptions, tableId: String)
extends BigtableIO[(ByteString, Iterable[T])] {
override type ReadP = Nothing
override type WriteP = BigtableWrite.WriteParam
override def testId: String =
s"BigtableIO(${bigtableOptions.getProjectId}\t${bigtableOptions.getInstanceId}\t$tableId)"
override protected def read(
sc: ScioContext,
params: ReadP
): SCollection[(ByteString, Iterable[T])] =
throw new UnsupportedOperationException(
"BigtableWrite is write-only, use Row to read from Bigtable"
)
override protected def write(
data: SCollection[(ByteString, Iterable[T])],
params: WriteP
): Tap[Nothing] = {
val sink =
params match {
case BigtableWrite.Default =>
val opts = bigtableOptions // defeat closure
beam.BigtableIO
.write()
.withProjectId(bigtableOptions.getProjectId)
.withInstanceId(bigtableOptions.getInstanceId)
.withTableId(tableId)
.withBigtableOptionsConfigurator(
Functions.serializableFn(_ => opts.toBuilder)
): @nowarn("cat=deprecation")
case BigtableWrite.Bulk(numOfShards, flushInterval) =>
new BigtableBulkWriter(tableId, bigtableOptions, numOfShards, flushInterval)
}
data.transform_("Bigtable write") { coll =>
coll
.map { case (key, value) =>
KV.of(key, value.asJava.asInstanceOf[java.lang.Iterable[Mutation]])
}
.applyInternal(sink)
}
EmptyTap
}
override def tap(params: ReadP): Tap[Nothing] =
EmptyTap
}
object BigtableWrite {
sealed trait WriteParam
object Default extends WriteParam
object Bulk {
private[bigtable] val DefaultFlushInterval = Duration.standardSeconds(1)
}
final case class Bulk private (
numOfShards: Int,
flushInterval: Duration = Bulk.DefaultFlushInterval
) extends WriteParam
final def apply[T <: Mutation](
projectId: String,
instanceId: String,
tableId: String
): BigtableWrite[T] = {
val bigtableOptions = BigtableOptions
.builder()
.setProjectId(projectId)
.setInstanceId(instanceId)
.build
BigtableWrite[T](bigtableOptions, tableId)
}
}