All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.extra.csv.syntax.SCollectionSyntax.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2020 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.extra.csv.syntax

import com.spotify.scio.annotations.experimental
import com.spotify.scio.coders.Coder
import com.spotify.scio.extra.csv.CsvIO
import com.spotify.scio.io.ClosedTap
import com.spotify.scio.util.FilenamePolicySupplier
import com.spotify.scio.values.SCollection
import kantan.csv.{CsvConfiguration, HeaderDecoder, HeaderEncoder}
import org.apache.beam.sdk.io.Compression
import org.apache.beam.sdk.io.FileIO.ReadableFile
import org.apache.beam.sdk.transforms.ParDo

trait SCollectionSyntax {
  implicit final class WritableCsvSCollection[T](private val self: SCollection[T]) {
    @experimental
    def saveAsCsvFile(
      path: String,
      suffix: String = CsvIO.WriteParam.DefaultSuffix,
      csvConfig: CsvConfiguration = CsvIO.WriteParam.DefaultCsvConfig,
      numShards: Int = CsvIO.WriteParam.DefaultNumShards,
      compression: Compression = CsvIO.WriteParam.DefaultCompression,
      shardNameTemplate: String = CsvIO.WriteParam.DefaultShardNameTemplate,
      tempDirectory: String = CsvIO.WriteParam.DefaultTempDirectory,
      filenamePolicySupplier: FilenamePolicySupplier =
        CsvIO.WriteParam.DefaultFilenamePolicySupplier,
      prefix: String = CsvIO.WriteParam.DefaultPrefix
    )(implicit enc: HeaderEncoder[T]): ClosedTap[Nothing] =
      self.write(CsvIO.Write[T](path))(
        CsvIO.WriteParam(
          compression = compression,
          csvConfiguration = csvConfig,
          suffix = suffix,
          numShards = numShards,
          filenamePolicySupplier = filenamePolicySupplier,
          prefix = prefix,
          shardNameTemplate = shardNameTemplate,
          tempDirectory = tempDirectory
        )
      )
  }

  implicit final class ReadableCsvFileSCollection(private val self: SCollection[ReadableFile]) {
    @experimental
    def readCsv[T: HeaderDecoder: Coder](csvConfiguration: CsvConfiguration): SCollection[T] =
      self
        .withName("Read CSV")
        .applyTransform(ParDo.of(CsvIO.ReadDoFn[T](csvConfiguration)))
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy