All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.scalding.parquet.HasColumnProjection.scala Maven / Gradle / Ivy

There is a newer version: 0.16.1-RC3
Show newest version
package com.twitter.scalding.parquet

import org.slf4j.LoggerFactory

object HasColumnProjection {
  val LOG = LoggerFactory.getLogger(this.getClass)

  def requireNoSemiColon(glob: String) = {
    require(!glob.contains(";"), "A column projection glob cannot contain a ; character")
  }
}

trait HasColumnProjection {
  import com.twitter.scalding.parquet.HasColumnProjection._

  /**
   * Deprecated. Use withColumnProjections, which uses a different glob syntax.
   *
   * The format for specifying columns is described here:
   * https://github.com/apache/parquet-mr/blob/3df3372a1ee7b6ea74af89f53a614895b8078609/parquet_cascading.md#2-projection-pushdown
   * (Note that this link is different from the one below in withColumnProjections)
   *
   * Note that the format described there says that multiple globs can be combined with a ; character.
   * Instead, we use a Set() here and will eventually join the set on the ; character for you.
   */
  @deprecated(message = "Use withColumnProjections, which uses a different glob syntax", since = "0.15.1")
  def withColumns: Set[String] = Set()

  /**
   * The format for specifying columns is described here:
   * https://github.com/apache/parquet-mr/blob/master/parquet_cascading.md#21-projection-pushdown-with-thriftscrooge-records
   *
   * Note that the format described there says that multiple globs can be combined with a ; character.
   * Instead, we use a Set() here and will eventually join the set on the ; character for you.
   */
  def withColumnProjections: Set[String] = Set()

  /**
   * Parquet accepts globs separated by the ; character
   */
  protected[parquet] final def columnProjectionString: Option[ColumnProjectionString] = {
    val deprecated = withColumns
    val strict = withColumnProjections

    require(deprecated.isEmpty || strict.isEmpty,
      "Cannot provide both withColumns and withColumnProjections")

    deprecated.foreach(requireNoSemiColon)
    strict.foreach(requireNoSemiColon)

    if (deprecated.nonEmpty) {
      LOG.warn("withColumns is deprecated. Please use withColumnProjections, which uses a different glob syntax")
      Some(DeprecatedColumnProjectionString(deprecated))
    } else if (strict.nonEmpty) {
      Some(StrictColumnProjectionString(strict))
    } else {
      None
    }
  }
}

sealed trait ColumnProjectionString {
  def globStrings: Set[String]
  def asSemicolonString: String = globStrings.mkString(";")
}
case class DeprecatedColumnProjectionString(globStrings: Set[String]) extends ColumnProjectionString
case class StrictColumnProjectionString(globStrings: Set[String]) extends ColumnProjectionString




© 2015 - 2025 Weber Informatics LLC | Privacy Policy