All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.extra.json.JsonIO.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2019 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.extra.json

import com.spotify.scio.ScioContext
import com.spotify.scio.io.{ScioIO, Tap, TapOf, TextIO}
import com.spotify.scio.util.ScioUtil
import com.spotify.scio.values.SCollection
import com.spotify.scio.coders.Coder
import io.circe.Printer
import io.circe.parser._
import io.circe.syntax._
import com.spotify.scio.io.TapT
import com.spotify.scio.util.FilenamePolicySupplier
import org.apache.beam.sdk.io.Compression

final case class JsonIO[T: Encoder: Decoder: Coder](path: String) extends ScioIO[T] {
  override type ReadP = JsonIO.ReadParam
  override type WriteP = JsonIO.WriteParam
  override val tapT: TapT.Aux[T, T] = TapOf[T]

  override protected def read(sc: ScioContext, params: ReadP): SCollection[T] =
    sc.read(TextIO(path))(params).map(decodeJson)

  override protected def write(data: SCollection[T], params: WriteP): Tap[T] = {
    data
      .map(x => params.printer.print(x.asJson))
      .write(TextIO(path))(params)
    tap(JsonIO.ReadParam(params))
  }

  override def tap(params: ReadP): Tap[T] = new Tap[T] {
    override def value: Iterator[T] = {
      val filePattern = ScioUtil.filePattern(path, params.suffix)
      TextIO.textFile(filePattern).map(decodeJson)
    }

    override def open(sc: ScioContext): SCollection[T] =
      JsonIO(path).read(sc, params)
  }

  private def decodeJson(json: String): T =
    decode[T](json).fold(throw _, identity)
}

object JsonIO {

  type ReadParam = TextIO.ReadParam
  val ReadParam = TextIO.ReadParam

  object WriteParam {
    val DefaultNumShards: Int = 0
    val DefaultSuffix: String = ".json"
    val DefaultCompression: Compression = Compression.UNCOMPRESSED
    val DefaultPrinter: Printer = Printer.noSpaces
    val DefaultFilenamePolicySupplier: FilenamePolicySupplier = null
    val DefaultShardNameTemplate: String = null
    val DefaultPrefix: String = null
    val DefaultTempDirectory: String = null

    implicit private[JsonIO] def textWriteParam(params: WriteParam): TextIO.WriteParam =
      TextIO.WriteParam(
        suffix = params.suffix,
        numShards = params.numShards,
        compression = params.compression,
        header = None, // no header in json
        footer = None, // no footer in json
        filenamePolicySupplier = params.filenamePolicySupplier,
        prefix = params.prefix,
        shardNameTemplate = params.shardNameTemplate,
        tempDirectory = params.tempDirectory
      )
  }

  final case class WriteParam private (
    suffix: String = WriteParam.DefaultSuffix,
    numShards: Int = WriteParam.DefaultNumShards,
    compression: Compression = WriteParam.DefaultCompression,
    printer: Printer = WriteParam.DefaultPrinter,
    filenamePolicySupplier: FilenamePolicySupplier = WriteParam.DefaultFilenamePolicySupplier,
    prefix: String = WriteParam.DefaultPrefix,
    shardNameTemplate: String = WriteParam.DefaultShardNameTemplate,
    tempDirectory: String = WriteParam.DefaultTempDirectory
  )
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy