All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metamx.tranquility.druid.DruidRollup.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.metamx.tranquility.druid

import io.druid.data.input.impl.SpatialDimensionSchema
import io.druid.data.input.impl.TimestampSpec
import io.druid.granularity.QueryGranularity
import io.druid.query.aggregation.AggregatorFactory
import java.{util => ju}
import scala.collection.JavaConverters._

/**
  * Describes rollup (dimensions, aggregators, index granularity) desired for a Druid datasource. Java users should use
  * the create methods on [[DruidRollup$]], as those accepts Java collections rather than Scala ones.
  *
  * See [[DruidDimensions.specific]], [[DruidDimensions.schemaless]], and [[DruidDimensions.schemalessWithExclusions]]
  * for three common ways of creating druid dimensions objects.
  */
class DruidRollup(
  val dimensions: DruidDimensions,
  val aggregators: IndexedSeq[AggregatorFactory],
  val indexGranularity: QueryGranularity,
  val isRollup: Boolean = true
)
{
  private val additionalExclusions: Set[String] = {
    (aggregators.flatMap(_.requiredFields().asScala) ++
      aggregators.map(_.getName)).toSet
  }

  validate()

  def validate() {
    val dimensionNames = dimensions.knownDimensions
    val metricNames = aggregators.map(_.getName)

    val allColumnNames = Seq(DruidRollup.InternalTimeColumnName) ++ dimensionNames ++ metricNames
    val duplicateColumns = allColumnNames.groupBy(identity).filter(_._2.size > 1).keySet

    if (duplicateColumns.nonEmpty) {
      throw new IllegalArgumentException("Duplicate columns: %s" format duplicateColumns.mkString(", "))
    }
  }

  def isStringDimension(timestampSpec: TimestampSpec, fieldName: String) = {
    dimensions match {
      case dims: SpecificDruidDimensions => dims.dimensionsSet.contains(fieldName)
      case SchemalessDruidDimensions(exclusions, _) =>
        fieldName != timestampSpec.getTimestampColumn &&
          !additionalExclusions.contains(fieldName) &&
          !exclusions.contains(fieldName)
    }
  }
}

sealed abstract class DruidDimensions
{
  def specMap: ju.Map[String, AnyRef]

  def knownDimensions: Seq[String]

  def spatialDimensions: Seq[DruidSpatialDimension]

  def withSpatialDimensions(xs: java.util.List[DruidSpatialDimension]): DruidDimensions
}

sealed abstract class DruidSpatialDimension
{
  def schema: SpatialDimensionSchema
}

case class SingleFieldDruidSpatialDimension(name: String) extends DruidSpatialDimension
{
  override def schema = new SpatialDimensionSchema(name, List.empty[String].asJava)
}

case class MultipleFieldDruidSpatialDimension(name: String, fieldNames: Seq[String]) extends DruidSpatialDimension
{
  override def schema = new SpatialDimensionSchema(name, fieldNames.asJava)
}

case class SpecificDruidDimensions(
  dimensions: Seq[String],
  spatialDimensions: Seq[DruidSpatialDimension] = Nil
) extends DruidDimensions
{
  val dimensionsSet = dimensions.toSet

  @transient override lazy val specMap: ju.Map[String, AnyRef] = {
    Map[String, AnyRef](
      "dimensions" -> dimensions.toIndexedSeq.asJava,
      "spatialDimensions" -> spatialDimensions.map(_.schema).asJava
    ).asJava
  }

  override def knownDimensions: Seq[String] = {
    dimensions ++ spatialDimensions.map(_.schema.getDimName)
  }

  /**
    * Convenience method for Java users. Scala users should use "copy".
    */
  override def withSpatialDimensions(xs: java.util.List[DruidSpatialDimension]) = copy(
    spatialDimensions = xs.asScala.toIndexedSeq
  )
}

case class SchemalessDruidDimensions(
  dimensionExclusions: Set[String],
  spatialDimensions: Seq[DruidSpatialDimension] = Nil
) extends DruidDimensions
{
  @transient override lazy val specMap: ju.Map[String, AnyRef] = {
    // Null dimensions causes the Druid parser to go schemaless.
    Map[String, AnyRef](
      "dimensionExclusions" -> dimensionExclusions.toSeq.asJava,
      "spatialDimensions" -> spatialDimensions.map(_.schema).asJava
    ).asJava
  }

  override def knownDimensions: Seq[String] = {
    spatialDimensions.map(_.schema.getDimName)
  }

  /**
    * Convenience method for Java users. Scala users should use "copy".
    */
  override def withSpatialDimensions(xs: java.util.List[DruidSpatialDimension]) = copy(
    spatialDimensions = xs
      .asScala
      .toIndexedSeq
  )
}

object SchemalessDruidDimensions
{
  def apply(
    dimensionExclusions: Seq[String]
  ): SchemalessDruidDimensions =
  {
    SchemalessDruidDimensions(dimensionExclusions.toSet, Vector.empty)
  }

  def apply(
    dimensionExclusions: Seq[String],
    spatialDimensions: IndexedSeq[DruidSpatialDimension]
  ): SchemalessDruidDimensions =
  {
    SchemalessDruidDimensions(dimensionExclusions.toSet, spatialDimensions)
  }
}

object DruidRollup
{
  private val InternalTimeColumnName = "__time"

  /**
    * Builder for Scala users. Accepts a druid dimensions object and can be used to build rollups based on specific
    * or schemaless dimensions.
    */
  def apply(
    dimensions: DruidDimensions,
    aggregators: Seq[AggregatorFactory],
    indexGranularity: QueryGranularity,
    isRollup: Boolean
  ) =
  {
    new DruidRollup(dimensions, aggregators.toIndexedSeq, indexGranularity, isRollup)
  }

  /**
    * Builder for Java users. Accepts a druid dimensions object and can be used to build rollups based on specific
    * or schemaless dimensions.
    *
    * See [[DruidDimensions.specific]], [[DruidDimensions.schemaless]], and [[DruidDimensions.schemalessWithExclusions]]
    * for three common ways of creating druid dimensions objects.
    */
  def create(
    dimensions: DruidDimensions,
    aggregators: java.util.List[AggregatorFactory],
    indexGranularity: QueryGranularity,
    isRollup: Boolean
  ): DruidRollup =
  {
    new DruidRollup(
      dimensions,
      aggregators.asScala.toIndexedSeq,
      indexGranularity,
      isRollup
    )
  }

  /**
    * Builder for Java users. Accepts dimensions as strings, and creates a rollup with those specific dimensions.
    */
  def create(
    dimensions: java.util.List[String],
    aggregators: java.util.List[AggregatorFactory],
    indexGranularity: QueryGranularity,
    isRollup: Boolean
  ): DruidRollup =
  {
    new DruidRollup(
      SpecificDruidDimensions(dimensions.asScala, Vector.empty),
      aggregators.asScala.toIndexedSeq,
      indexGranularity,
      isRollup
    )
  }
}

object DruidDimensions
{
  /**
    * Creates a druid dimensions object representing a specific set of dimensions. Only these fields will be
    * indexed as dimensions.
    */
  def specific(dimensions: java.util.List[String]): DruidDimensions = {
    SpecificDruidDimensions(dimensions.asScala, Vector.empty)
  }

  /**
    * Creates a druid dimensions object representing schemaless dimensions. All fields that are not part of an
    * aggregator will be indexed as dimensions.
    */
  def schemaless(): DruidDimensions = {
    SchemalessDruidDimensions(Vector.empty, Vector.empty)
  }

  /**
    * Creates a druid dimensions object representing schemaless dimensions. All fields that are not part of an
    * aggregator, and not in the exclusions list, will be indexed as dimensions.
    */
  def schemalessWithExclusions(dimensionExclusions: java.util.List[String]): DruidDimensions = {
    SchemalessDruidDimensions(dimensionExclusions.asScala.toSet, Vector.empty)
  }
}

object DruidSpatialDimension
{
  /**
    * Builder for Java users.
    */
  def singleField(name: String): DruidSpatialDimension = {
    new SingleFieldDruidSpatialDimension(name)
  }

  /**
    * Builder for Java users.
    */
  def multipleField(name: String, fieldNames: java.util.List[String]): DruidSpatialDimension = {
    new MultipleFieldDruidSpatialDimension(name, fieldNames.asScala)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy