All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.extra.bigquery.ToTableRow.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2019 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.extra.bigquery

import com.spotify.scio.extra.bigquery.AvroConverters.AvroConversionException

import java.math.{BigDecimal => JBigDecimal}
import java.nio.ByteBuffer
import java.util

import com.spotify.scio.bigquery.TableRow
import org.apache.avro.Schema
import org.apache.avro.generic.GenericData.EnumSymbol
import org.apache.avro.generic.{GenericFixed, IndexedRecord}
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding

import java.time.format.DateTimeFormatter
import java.time.{Instant, LocalDate, LocalTime, ZoneOffset}
import scala.jdk.CollectionConverters._

private object ToTableRow {
  private lazy val EncodingPropName: String = "bigquery.bytes.encoder"
  private lazy val Base64Encoding: BaseEncoding = BaseEncoding.base64()
  private lazy val HexEncoding: BaseEncoding = BaseEncoding.base16()

  // YYYY-[M]M-[D]D
  private lazy val JodaLocalDateFormatter =
    org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC()
  private lazy val LocalDateFormatter =
    DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneOffset.UTC)

  // YYYY-[M]M-[D]D[( |T)[H]H:[M]M:[S]S[.DDDDDD]]
  private lazy val JodaLocalTimeFormatter =
    org.joda.time.format.DateTimeFormat.forPattern("HH:mm:ss.SSSSSS")
  private lazy val LocalTimeFormatter =
    DateTimeFormatter.ofPattern("HH:mm:ss.SSSSSS")

  // YYYY-[M]M-[D]D[( |T)[H]H:[M]M:[S]S[.DDDDDD]][time zone]
  private lazy val JodaTimestampFormatter =
    org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS")
  private lazy val TimestampFormatter =
    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS").withZone(ZoneOffset.UTC)
}

/**
 * Converts an [[org.apache.avro.generic.IndexedRecord IndexedRecord]] into a
 * [[com.spotify.scio.bigquery.TableRow TableRow]].
 */
private[bigquery] trait ToTableRow {
  import ToTableRow._

  private[bigquery] def toTableRowField(fieldValue: Any, field: Schema.Field): Any =
    fieldValue match {
      case x: CharSequence            => x.toString
      case x: EnumSymbol              => x.toString
      case x: Enum[_]                 => x.name()
      case x: JBigDecimal             => x.toString
      case x: Number                  => x
      case x: Boolean                 => x
      case x: GenericFixed            => encodeByteArray(x.bytes(), field.schema())
      case x: ByteBuffer              => encodeByteArray(toByteArray(x), field.schema())
      case x: util.Map[_, _]          => toTableRowFromMap(x.asScala, field)
      case x: java.lang.Iterable[_]   => toTableRowFromIterable(x.asScala, field)
      case x: IndexedRecord           => AvroConverters.toTableRow(x)
      case x: LocalDate               => LocalDateFormatter.format(x)
      case x: LocalTime               => LocalTimeFormatter.format(x)
      case x: Instant                 => TimestampFormatter.format(x)
      case x: org.joda.time.LocalDate => JodaLocalDateFormatter.print(x)
      case x: org.joda.time.LocalTime => JodaLocalTimeFormatter.print(x)
      case x: org.joda.time.DateTime  => JodaTimestampFormatter.print(x)
      case _ =>
        throw AvroConversionException(
          s"ToTableRow conversion failed:" +
            s"could not match ${fieldValue.getClass}"
        )
    }

  private def toTableRowFromIterable(iterable: Iterable[Any], field: Schema.Field): util.List[_] =
    iterable
      .map { item =>
        if (item.isInstanceOf[Iterable[_]] || item.isInstanceOf[Map[_, _]]) {
          throw AvroConversionException(
            s"ToTableRow conversion failed for item $item: " +
              s"iterable and map types not supported"
          )
        }
        toTableRowField(item, field)
      }
      .toList
      .asJava

  private def toTableRowFromMap(map: Iterable[(Any, Any)], field: Schema.Field): util.List[_] =
    map
      .map { case (k, v) =>
        new TableRow()
          .set("key", toTableRowField(k, field))
          .set("value", toTableRowField(v, field))
      }
      .toList
      .asJava

  private def encodeByteArray(bytes: Array[Byte], fieldSchema: Schema): String =
    Option(fieldSchema.getProp(EncodingPropName)) match {
      case Some("BASE64") => Base64Encoding.encode(bytes)
      case Some("HEX")    => HexEncoding.encode(bytes)
      case Some(encoding) =>
        throw AvroConversionException(s"Unsupported encoding $encoding")
      case None => Base64Encoding.encode(bytes)
    }

  private def toByteArray(buffer: ByteBuffer) = {
    val copy = buffer.asReadOnlyBuffer
    val bytes = new Array[Byte](copy.limit)
    copy.rewind
    copy.get(bytes)
    bytes
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy