com.spotify.scio.coders.ScalaCoders.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scio-coders_2.11 Show documentation
Show all versions of scio-coders_2.11 Show documentation
Scio add-on for static Coder derivation
The newest version!
/*
* Copyright 2016 Spotify AB.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.spotify.scio.coders
import java.io.{InputStream, OutputStream}
import java.util.Collections
import org.apache.beam.sdk.coders.Coder.NonDeterministicException
import org.apache.beam.sdk.coders.{Coder => BCoder, _}
import org.apache.beam.sdk.util.common.ElementByteSizeObserver
import scala.reflect.ClassTag
import scala.collection.{BitSet, SortedSet, TraversableOnce, mutable => m}
import scala.collection.convert.Wrappers
import scala.language.higherKinds
private object UnitCoder extends AtomicCoder[Unit] {
override def encode(value: Unit, os: OutputStream): Unit = ()
override def decode(is: InputStream): Unit = ()
}
private object NothingCoder extends AtomicCoder[Nothing] {
override def encode(value: Nothing, os: OutputStream): Unit = ()
override def decode(is: InputStream): Nothing = ??? // can't possibly happen
}
private abstract class BaseSeqLikeCoder[M[_], T](val elemCoder: BCoder[T])(
implicit toSeq: M[T] => TraversableOnce[T])
extends AtomicCoder[M[T]] {
override def getCoderArguments: java.util.List[_ <: BCoder[_]] =
Collections.singletonList(elemCoder)
// delegate methods for determinism and equality checks
override def verifyDeterministic(): Unit = elemCoder.verifyDeterministic()
override def consistentWithEquals(): Boolean = elemCoder.consistentWithEquals()
override def structuralValue(value: M[T]): AnyRef = {
val b = Seq.newBuilder[AnyRef]
value.foreach(v => b += elemCoder.structuralValue(v))
b.result()
}
// delegate methods for byte size estimation
override def isRegisterByteSizeObserverCheap(value: M[T]): Boolean = false
override def registerByteSizeObserver(value: M[T], observer: ElementByteSizeObserver): Unit = {
if (value.isInstanceOf[Wrappers.JIterableWrapper[_]]) {
val wrapper = value.asInstanceOf[Wrappers.JIterableWrapper[T]]
IterableCoder.of(elemCoder).registerByteSizeObserver(wrapper.underlying, observer)
} else {
super.registerByteSizeObserver(value, observer)
}
}
}
private abstract class SeqLikeCoder[M[_], T](bc: BCoder[T])(
implicit toSeq: M[T] => TraversableOnce[T])
extends BaseSeqLikeCoder[M, T](bc) {
private[this] val lc = VarIntCoder.of()
override def encode(value: M[T], outStream: OutputStream): Unit = {
lc.encode(value.size, outStream)
value.foreach(bc.encode(_, outStream))
}
def decode(inStream: InputStream, builder: scala.collection.mutable.Builder[T, M[T]]): M[T] = {
val size = lc.decode(inStream)
(1 to size).foreach(_ => builder += bc.decode(inStream))
builder.result()
}
}
private class OptionCoder[T](bc: BCoder[T]) extends SeqLikeCoder[Option, T](bc) {
private[this] val bcoder = BooleanCoder.of().asInstanceOf[BCoder[Boolean]]
override def encode(value: Option[T], os: OutputStream): Unit = {
bcoder.encode(value.isDefined, os)
value.foreach { bc.encode(_, os) }
}
override def decode(is: InputStream): Option[T] = {
val isDefined = bcoder.decode(is)
if (isDefined) Some(bc.decode(is)) else None
}
}
private class SeqCoder[T](bc: BCoder[T]) extends SeqLikeCoder[Seq, T](bc) {
override def decode(inStream: InputStream): Seq[T] = decode(inStream, Seq.newBuilder[T])
}
private class ListCoder[T](bc: BCoder[T]) extends SeqLikeCoder[List, T](bc) {
override def decode(inStream: InputStream): List[T] = decode(inStream, List.newBuilder[T])
}
// TODO: implement chunking
private class TraversableOnceCoder[T](bc: BCoder[T]) extends SeqLikeCoder[TraversableOnce, T](bc) {
override def decode(inStream: InputStream): TraversableOnce[T] =
decode(inStream, Seq.newBuilder[T])
}
// TODO: implement chunking
private class IterableCoder[T](bc: BCoder[T]) extends SeqLikeCoder[Iterable, T](bc) {
override def decode(inStream: InputStream): Iterable[T] =
decode(inStream, Iterable.newBuilder[T])
}
private class VectorCoder[T](bc: BCoder[T]) extends SeqLikeCoder[Vector, T](bc) {
override def decode(inStream: InputStream): Vector[T] = decode(inStream, Vector.newBuilder[T])
}
private class ArrayCoder[T: ClassTag](bc: BCoder[T]) extends SeqLikeCoder[Array, T](bc) {
override def decode(inStream: InputStream): Array[T] = decode(inStream, Array.newBuilder[T])
}
private class ArrayBufferCoder[T](bc: BCoder[T]) extends SeqLikeCoder[m.ArrayBuffer, T](bc) {
override def decode(inStream: InputStream): m.ArrayBuffer[T] =
decode(inStream, m.ArrayBuffer.newBuilder[T])
}
private class BufferCoder[T](bc: BCoder[T]) extends SeqLikeCoder[m.Buffer, T](bc) {
override def decode(inStream: InputStream): m.Buffer[T] = decode(inStream, m.Buffer.newBuilder[T])
}
private class SetCoder[T](bc: BCoder[T]) extends SeqLikeCoder[Set, T](bc) {
override def decode(inStream: InputStream): Set[T] = decode(inStream, Set.newBuilder[T])
}
private class SortedSetCoder[T: Ordering](bc: BCoder[T]) extends SeqLikeCoder[SortedSet, T](bc) {
override def decode(inStream: InputStream): SortedSet[T] =
decode(inStream, SortedSet.newBuilder[T])
}
private class BitSetCoder extends AtomicCoder[BitSet] {
private[this] val lc = VarIntCoder.of()
def decode(in: InputStream): BitSet = {
val l = lc.decode(in)
val builder = BitSet.newBuilder
(1 to l).foreach(_ => builder += lc.decode(in))
builder.result()
}
def encode(ts: BitSet, out: OutputStream): Unit = {
lc.encode(ts.size, out)
ts.foreach(v => lc.encode(v, out))
}
}
private class MapCoder[K, V](kc: BCoder[K], vc: BCoder[V]) extends AtomicCoder[Map[K, V]] {
private[this] val lc = VarIntCoder.of()
override def encode(value: Map[K, V], os: OutputStream): Unit = {
lc.encode(value.size, os)
value.foreach {
case (k, v) =>
kc.encode(k, os)
vc.encode(v, os)
}
}
override def decode(is: InputStream): Map[K, V] = {
val l = lc.decode(is)
val builder = Map.newBuilder[K, V]
(1 to l).map { _ =>
val k = kc.decode(is)
val v = vc.decode(is)
builder += (k -> v)
}
builder.result()
}
// delegate methods for determinism and equality checks
override def verifyDeterministic(): Unit =
throw new NonDeterministicException(this,
"Ordering of entries in a Map may be non-deterministic.")
override def consistentWithEquals(): Boolean = false
// delegate methods for byte size estimation
override def isRegisterByteSizeObserverCheap(value: Map[K, V]): Boolean = false
override def registerByteSizeObserver(value: Map[K, V],
observer: ElementByteSizeObserver): Unit = {
lc.registerByteSizeObserver(value.size, observer)
value.foreach {
case (k, v) =>
kc.registerByteSizeObserver(k, observer)
vc.registerByteSizeObserver(v, observer)
}
}
}
private class MutableMapCoder[K, V](kc: BCoder[K], vc: BCoder[V]) extends AtomicCoder[m.Map[K, V]] {
private[this] val lc = VarIntCoder.of()
override def encode(value: m.Map[K, V], os: OutputStream): Unit = {
lc.encode(value.size, os)
value.foreach {
case (k, v) =>
kc.encode(k, os)
vc.encode(v, os)
}
}
override def decode(is: InputStream): m.Map[K, V] = {
val l = lc.decode(is)
val builder = m.Map.newBuilder[K, V]
(1 to l).map { _ =>
val k = kc.decode(is)
val v = vc.decode(is)
builder += (k -> v)
}
builder.result()
}
// delegate methods for determinism and equality checks
override def verifyDeterministic(): Unit =
throw new NonDeterministicException(this,
"Ordering of entries in a Map may be non-deterministic.")
override def consistentWithEquals(): Boolean = false
// delegate methods for byte size estimation
override def isRegisterByteSizeObserverCheap(value: m.Map[K, V]): Boolean = false
override def registerByteSizeObserver(value: m.Map[K, V],
observer: ElementByteSizeObserver): Unit = {
lc.registerByteSizeObserver(value.size, observer)
value.foreach {
case (k, v) =>
kc.registerByteSizeObserver(k, observer)
vc.registerByteSizeObserver(v, observer)
}
}
}
trait ScalaCoders {
implicit def byteCoder: Coder[Byte] =
Coder.beam(ByteCoder.of().asInstanceOf[BCoder[Byte]])
implicit def stringCoder: Coder[String] =
Coder.beam(StringUtf8Coder.of())
implicit def shortCoder: Coder[Short] =
Coder.beam(BigEndianShortCoder.of().asInstanceOf[BCoder[Short]])
implicit def intCoder: Coder[Int] =
Coder.beam(VarIntCoder.of().asInstanceOf[BCoder[Int]])
implicit def longCoder: Coder[Long] =
Coder.beam(BigEndianLongCoder.of().asInstanceOf[BCoder[Long]])
implicit def floatCoder: Coder[Float] =
Coder.beam(FloatCoder.of().asInstanceOf[BCoder[Float]])
implicit def doubleCoder: Coder[Double] =
Coder.beam(DoubleCoder.of().asInstanceOf[BCoder[Double]])
implicit def booleanCoder: Coder[Boolean] =
Coder.beam(BooleanCoder.of().asInstanceOf[BCoder[Boolean]])
implicit def unitCoder: Coder[Unit] = Coder.beam(UnitCoder)
implicit def nothingCoder: Coder[Nothing] = Coder.beam[Nothing](NothingCoder)
implicit def bigIntCoder: Coder[BigInt] =
Coder.xmap(Coder.beam(BigIntegerCoder.of()))(BigInt.apply, _.bigInteger)
implicit def bigDecimalCoder: Coder[BigDecimal] =
Coder.xmap(Coder.beam(BigDecimalCoder.of()))(BigDecimal.apply, _.bigDecimal)
implicit def optionCoder[T, S[_] <: Option[_]](implicit c: Coder[T]): Coder[S[T]] =
Coder
.transform(c) { bc =>
Coder.beam(new OptionCoder[T](bc))
}
.asInstanceOf[Coder[S[T]]]
implicit def noneCoder: Coder[None.type] =
optionCoder[Nothing, Option](nothingCoder).asInstanceOf[Coder[None.type]]
implicit def bitSetCoder: Coder[BitSet] = Coder.beam(new BitSetCoder)
implicit def seqCoder[T: Coder]: Coder[Seq[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new SeqCoder[T](bc))
}
// TODO: proper chunking implementation
implicit def iterableCoder[T: Coder]: Coder[Iterable[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new IterableCoder[T](bc))
}
implicit def throwableCoder[T <: Throwable: ClassTag]: Coder[T] =
Coder.kryo[T]
// specialized coder. Since `::` is a case class, Magnolia would derive an incorrect one...
implicit def listCoder[T: Coder]: Coder[List[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new ListCoder[T](bc))
}
implicit def traversableOnceCoder[T: Coder]: Coder[TraversableOnce[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new TraversableOnceCoder[T](bc))
}
implicit def setCoder[T: Coder]: Coder[Set[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new SetCoder[T](bc))
}
implicit def vectorCoder[T: Coder]: Coder[Vector[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new VectorCoder[T](bc))
}
implicit def arrayBufferCoder[T: Coder]: Coder[m.ArrayBuffer[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new ArrayBufferCoder[T](bc))
}
implicit def bufferCoder[T: Coder]: Coder[m.Buffer[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new BufferCoder[T](bc))
}
implicit def arrayCoder[T: Coder: ClassTag]: Coder[Array[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new ArrayCoder[T](bc))
}
implicit def arrayByteCoder: Coder[Array[Byte]] =
Coder.beam(ByteArrayCoder.of())
implicit def mutableMapCoder[K: Coder, V: Coder]: Coder[m.Map[K, V]] =
Coder.transform(Coder[K]) { kc =>
Coder.transform(Coder[V]) { vc =>
Coder.beam(new MutableMapCoder[K, V](kc, vc))
}
}
implicit def mapCoder[K: Coder, V: Coder]: Coder[Map[K, V]] =
Coder.transform(Coder[K]) { kc =>
Coder.transform(Coder[V]) { vc =>
Coder.beam(new MapCoder[K, V](kc, vc))
}
}
implicit def sortedSetCoder[T: Coder: Ordering]: Coder[SortedSet[T]] =
Coder.transform(Coder[T]) { bc =>
Coder.beam(new SortedSetCoder[T](bc))
}
// implicit def enumerationCoder[E <: Enumeration]: Coder[E#Value] = ???
}