org.sparklinedata.druid.DruidQuerySpec.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of accelerator_2.10 Show documentation
accelerator
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.sparklinedata.druid

import java.io.InputStream
import java.util.Locale

import org.apache.spark.sql.sources.druid.{DruidQueryResultIterator, SearchQueryResultIterator}

import scala.collection.breakOut
import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StringType}
import org.joda.time.Interval
import org.sparklinedata.druid.client.QueryResultRow
import org.sparklinedata.druid.metadata.{DruidDataSource, DruidDataType, DruidSegmentInfo}

sealed trait ExtractionFunctionSpec {
  val `type`: String
}

/**
 * In SQL this is an 'like' or rlike' predicate on a column being grouped on.
  *
  * @param `type`
 * @param expr
 */
case class RegexExtractionFunctionSpec(val `type`: String, val expr: String)
  extends ExtractionFunctionSpec

/**
 * In SQL this is a grouping expression of the form 'if col rlike regex then regex else null'
  *
  * @param `type`
 * @param expr
 */
case class PartialExtractionFunctionSpec(val `type`: String, val expr: String)
  extends ExtractionFunctionSpec

/**
 * In SQL this is a contains predicate on a column being grouped on.
  *
  * @param `type`
 * @param query
 */
case class SearchQueryExtractionFunctionSpec(val `type`: String, val query: String)
  extends ExtractionFunctionSpec

/**
 * In SQL this is a withTimeZone and field extraction functions applied to the time dimension
 * columns. Assume time functions are expressed using
 * [[https://github.com/SparklineData/spark-datetime SparklineData-sparkdatetime package]].
  *
  * @param `type`
 * @param format
 * @param timeZone
 * @param locale
 */
case class TimeFormatExtractionFunctionSpec(val `type`: String,
                                            val format: String,
                                            val timeZone: Option[String],
                                            val locale: Option[String])
  extends ExtractionFunctionSpec {
  def this(format: String,
           timeZone: Option[String]) =
    this("timeFormat", format, timeZone, Some(Locale.getDefault.toString))
}

case class TimeParsingExtractionFunctionSpec(val `type`: String,
                                             val timeFormat: String,
                                             val resultFormat: String)
  extends ExtractionFunctionSpec {
  def this(timeFormat: String, resultFormat: String) =
    this("time", timeFormat, resultFormat)
}

case class JavaScriptExtractionFunctionSpec(val `type`: String,
                                            val `function`: String,
                                            val injective: Boolean = false)
  extends ExtractionFunctionSpec {
  def this(fn: String) = this("javascript", fn)
}

case class LookUpMap(val `type`: String, val `map`: Map[String, String])

case class InExtractionFnSpec(val `type`: String, lookup: LookUpMap)
  extends ExtractionFunctionSpec {
  def this(valLst: List[String]) = this("lookup",
    LookUpMap("map", valLst.map(x => (x, "true"))(breakOut)))
}

/**
  * As defined in [[http://druid.io/docs/latest/querying/dimensionspecs.html]]
 */
sealed trait DimensionSpec {
  val `type`: String
  val dimension: String
  val outputName: String

  def sparkDataType(dDS : DruidDataSource) : DataType = StringType
}

/**
 * In SQL these are columns being grouped on.
  *
  * @param `type`
 * @param dimension
 * @param outputName
 */
case class DefaultDimensionSpec(val `type`: String, val dimension: String,
                                val outputName: String) extends DimensionSpec {
  def this(dimension : String,
           outputName: String) = this("default", dimension, outputName)
  def this(dimension : String) = this("default", dimension, dimension)
}

case class ExtractionDimensionSpec(val `type`: String,
                                   val dimension: String,
                                   val outputName: String,
                                   extractionFn: ExtractionFunctionSpec) extends DimensionSpec {
  def this(dimension: String,
           outputName: String,
           extractionFn: ExtractionFunctionSpec) =
    this("extraction", dimension, outputName, extractionFn)
}

sealed trait GranularitySpec {
  val `type`: String
}

case class DurationGranularitySpec(`type`: String, duration: Long) extends GranularitySpec

case class PeriodGranularitySpec(`type`: String, period: String,
                                 timeZone: Option[String],
                                 origin: Option[String]) extends GranularitySpec {
  def this(period: String) = this("period", period, None, None)
}

sealed trait FilterSpec {
  val `type`: String
}

case class SelectorFilterSpec(`type`: String,
                              dimension: String,
                              value: String) extends FilterSpec {
  def this(dimension: String,
           value: String) = this("selector", dimension, value)
}

case class RegexFilterSpec(`type`: String,
                           dimension: String,
                           pattern: String) extends FilterSpec

case class LogicalFilterSpec(`type`: String,
                             fields: List[FilterSpec]) extends FilterSpec

case class NotFilterSpec(`type`: String,
                         field: FilterSpec) extends FilterSpec

case class ExtractionFilterSpec(`type`: String,
                                dimension: String,
                                value: String,
                                extractionFn: InExtractionFnSpec) extends FilterSpec {
  def this(dimension: String, valList: List[String]) = this("extraction", dimension, "true",
    new InExtractionFnSpec(valList))
}

/**
 * In SQL an invocation on a special JSPredicate is translated to this FilterSpec.
 * JSPredicate has the signature jspredicate(colum, jsFuncCodeAsString)
  *
  * @param `type`
 * @param dimension
 * @param function
 */
case class JavascriptFilterSpec(`type`: String,
                                dimension: String,
                                function: String) extends FilterSpec {
  def this(dimension: String,
           function: String) = this("javascript", dimension, function)

}

object JavascriptFilterSpec {

  def jsFn(compareOp : String, value : String) : String = {
     s"function(x) { return(x $compareOp '$value') }"
  }

  def create(dimension: String,
           compareOp : String,
           value : String) = new JavascriptFilterSpec(dimension, jsFn(compareOp, value))

}

case class BoundFilterSpec(`type`: String,
                           dimension: String,
                           lower: Option[String],
                           lowerStrict : Option[Boolean],
                           upper: Option[String],
                           upperStrict : Option[Boolean],
                           alphaNumeric: Boolean) extends FilterSpec {

  def this(dimension: String,
           lower: Option[String],
           lowerStrict : Option[Boolean],
           upper: Option[String],
           upperStrict : Option[Boolean],
           alphaNumeric: Boolean) =
    this("bound", dimension, lower, lowerStrict, upper, upperStrict, alphaNumeric)
}

sealed trait AggregationSpec {
  val `type`: String
  val name : String

  // TODO: get rid of this method: eventually translation of sql infers DataType
  def sparkDataType(dDS : DruidDataSource) : DataType
}

/**
 * In SQL an aggregation expression on a metric is translated to this Spec.
  *
  * @param `type` can be "count", "longSum", "doubleSum", "min", "max", "hyperUnique"
 * @param name
 * @param fieldName
 */
case class FunctionAggregationSpec(val `type`: String,
                                   val name: String,
                                   val fieldName: String
                                    ) extends AggregationSpec {
  def sparkDataType(dDS : DruidDataSource) : DataType =
    dDS.metric(fieldName).map(c =>
      DruidDataType.sparkDataType(c.dataType)).getOrElse(
        throw new DruidDataSourceException(s"Unknown field $fieldName"))
}

/**
 * In SQL a count(distinct dimColumn) is translated to this Spec.
  *
  * @param `type`
 * @param name
 * @param fieldNames
 * @param byRow
 */
case class CardinalityAggregationSpec(val `type`: String,
                                      val name: String,
                                      val fieldNames: List[String],
                                      val byRow: Boolean
                                       ) extends AggregationSpec {
  def this(name: String,
           fieldNames: List[String]) = this("cardinality", name, fieldNames, true)

  def sparkDataType(dDS : DruidDataSource) : DataType = DoubleType
}

/**
 * In SQL this is an invocation on a special JSAgg function. Its signature is
 * JSAggLong(metricCol, aggFnCode, combineFnCode, resetFnCode). Similar kind of
 * function double metrics: JSAggDouble.
  *
  * @param `type`
 * @param name
 * @param fieldNames
 * @param fnAggregate
 * @param fnCombine
 * @param fnReset
 */
case class JavascriptAggregationSpec(val `type`: String,
                                     val name: String,
                                     val fieldNames: List[String],
                                     val fnAggregate: String,
                                     val fnCombine: String,
                                     val fnReset: String
                                      ) extends AggregationSpec {

  // for now assuming it is always Double
  def sparkDataType(dDS : DruidDataSource) : DataType = DoubleType
}

/**
 * In SQL this is an aggregation on an If condition. For example
 * sum(if dimCol = value then metric else null end)
  *
  * @param `type`
 * @param filter
 * @param aggregator
 */
case class FilteredAggregationSpec(val `type`: String,
                                   val name: String,
                                   val filter: SelectorFilterSpec,
                                   val aggregator: AggregationSpec
                                    ) extends AggregationSpec {

  def sparkDataType(dDS : DruidDataSource) : DataType = aggregator.sparkDataType(dDS)
}

sealed trait PostAggregationSpec {
  val `type`: String
  val name: String

  // TODO: get rid of this method: eventually translation of sql infers DataType
  // for now set them to DoubleType
  def sparkDataType(dDS : DruidDataSource) : DataType = DoubleType
}

case class FieldAccessPostAggregationSpec(val `type`: String,
                                          val fieldName: String
                                           ) extends PostAggregationSpec {
  def this(fieldName: String) = this("fieldAccess", fieldName)
  override val name = fieldName
}

case class ConstantPostAggregationSpec(val `type`: String,
                                       val name: String,
                                       val value: Double
                                        ) extends PostAggregationSpec

case class HyperUniqueCardinalityPostAggregationSpec(val `type`: String,
                                                     val name: String,
                                                     val fieldName: String
                                                      ) extends PostAggregationSpec

/**
 * In SQL this is an expression involving at least 1 aggregation expression
  *
  * @param `type`
 * @param name
 * @param fn can be +, -, *, /
 * @param fields
 * @param ordering used if the ordering is on the post aggregation expression.
 */
case class ArithmeticPostAggregationSpec(val `type`: String,
                                         val name: String,
                                         val fn: String,
                                         val fields: List[PostAggregationSpec],
                                         val ordering: Option[String]
                                          ) extends PostAggregationSpec {
  def this(name: String,
  fn: String,
  fields: List[PostAggregationSpec],
  ordering: Option[String]) = this("arithmetic", name, fn, fields, ordering)
}

case class JavascriptPostAggregationSpec(val `type`: String,
                                         val name: String,
                                         val fields: List[PostAggregationSpec],
                                         val function: String
                                          ) extends PostAggregationSpec

/**
 *
 * @param dimension
 * @param direction can be "ascending"|"descending"
 */
case class OrderByColumnSpec(val dimension: String,
                             val direction: String) {

  def this(dimension: String, asc : Boolean) =
    this(dimension, if (asc) "ascending" else "descending")

  def this(dimension: String) =
    this(dimension, "ascending")
}

case class LimitSpec(val `type`: String,
                     val limit: Int,
                     val columns: List[OrderByColumnSpec]) {

  def this(limit: Int,
           columns: List[OrderByColumnSpec]) = this("default", limit, columns)

  def this(limit: Int,
           columns: OrderByColumnSpec*) = this("default", limit, columns.toList)
}

sealed trait HavingSpec {
  val `type`: String
}

/**
 *
 * @param `type` is greaterThan, equalTo, lessThan
 * @param aggregation
 * @param value
 */
case class ComparisonHavingSpec(val `type`: String,
                                val aggregation: String,
                                val value: Double)

case class LogicalBinaryOpHavingSpec(val `type`: String,
                                     val havingSpecs: List[HavingSpec])

case class NotOpHavingSpec(val `type`: String,
                           val havingSpec: HavingSpec)

sealed trait TopNMetricSpec {
  val `type`: String
}

case class NumericTopNMetricSpec(
                                  val `type`: String,
                                  val metric: String
                                  ) extends TopNMetricSpec

case class LexiographicTopNMetricSpec(
                                       val `type`: String,
                                       val previousStop: String
                                       ) extends TopNMetricSpec

case class AlphaNumericTopNMetricSpec(
                                       val `type`: String,
                                       val previousStop: String
                                       ) extends TopNMetricSpec

case class InvertedTopNMetricSpec(
                                   val `type`: String,
                                   val metric: TopNMetricSpec
                                   ) extends TopNMetricSpec


case class SegmentInterval(itvl : String,
                           ver : String,
                           part : Option[Int]
                          )

case class SegmentIntervals(`type` : String,
                            segments : List[SegmentInterval]
                           ) {
  def this(segInAssignments: List[(DruidSegmentInfo, Interval)]) = {
    this("segments", segInAssignments.map {
      case (segInfo, in) =>
        val itvl: String = in.toString
        val ver: String = segInfo.version
        val part: Option[Int] = segInfo.shardSpec.flatMap(_.partitionNum)
        SegmentInterval(itvl, ver, part)
    }
    )
  }
}

object SegmentIntervals {

  def segmentIntervals(segInAssignments: List[DruidSegmentInfo]) : SegmentIntervals = {
    SegmentIntervals("segments", segInAssignments.map {
      case segInfo =>
        val itvl: String = segInfo.interval
        val ver: String = segInfo.version
        val part: Option[Int] = segInfo.shardSpec.flatMap(_.partitionNum)
        SegmentInterval(itvl, ver, part)
    }
    )
  }
}

trait SearchQueryQuerySpec {
  self : Product =>
}

case class InsensitiveContainsSearchQuerySpec (
                                                `type` : String,
                                                value : String
                                              ) extends SearchQueryQuerySpec {
  def this() = this("insensitive_contains", "")
}

case class SortSearchQuerySpec(
                                `type` : String
                              )

// TODO: look into exposing ContextSpec
sealed trait QuerySpec {
  self : Product =>
  val queryType: String
  val dataSource: String

  def intervalList: List[String]
  def setIntervals(ins : List[Interval]) : QuerySpec
  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec

  def dimensions : List[DimensionSpec] = Nil
  def aggregations : List[AggregationSpec] = Nil
  def postAggregations : Option[List[PostAggregationSpec]] = None

  def filter : Option[FilterSpec]

  def setFilter(fSpec : FilterSpec) : QuerySpec

  def apply(useSmile : Boolean,
            is : InputStream,
            onDone : => Unit = (),
            fromList : Boolean = false) : CloseableIterator[QueryResultRow] =
    DruidQueryResultIterator(useSmile, is, onDone, fromList)
}

case class GroupByQuerySpec(
                             val queryType: String,
                             val dataSource: String,
                             override val dimensions: List[DimensionSpec],
                             val limitSpec: Option[LimitSpec],
                             val having: Option[HavingSpec],
                             val granularity: Either[String,GranularitySpec],
                             val filter: Option[FilterSpec],
                             override val aggregations: List[AggregationSpec],
                             override val postAggregations: Option[List[PostAggregationSpec]],
                             val intervals: List[String]
                             ) extends QuerySpec {
  def this(dataSource: String,
           dimensions: List[DimensionSpec],
           limitSpec: Option[LimitSpec],
           having: Option[HavingSpec],
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           aggregations: List[AggregationSpec],
           postAggregations: Option[List[PostAggregationSpec]],
           intervals: List[String]) = this("groupBy",
    dataSource, dimensions, limitSpec, having, granularity, filter,
    aggregations, postAggregations, intervals)

  def setIntervals(ins : List[Interval]) = this.copy(intervals = ins.map(_.toString))
  def intervalList: List[String] = intervals

  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec =
    GroupByQuerySpecWithSegIntervals(
      queryType,
      dataSource,
      dimensions,
      limitSpec,
      having,
      granularity,
      filter,
      aggregations,
      postAggregations,
      null
    ).setSegIntervals(segIns)

  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class GroupByQuerySpecWithSegIntervals(
                             val queryType: String,
                             val dataSource: String,
                             override val dimensions: List[DimensionSpec],
                             val limitSpec: Option[LimitSpec],
                             val having: Option[HavingSpec],
                             val granularity: Either[String,GranularitySpec],
                             val filter: Option[FilterSpec],
                             override val aggregations: List[AggregationSpec],
                             override val postAggregations: Option[List[PostAggregationSpec]],
                             val intervals: SegmentIntervals
                           ) extends QuerySpec {

  override def intervalList: List[String] = intervals.segments.map(_.itvl)

  override def setSegIntervals(segInAssignments: List[(DruidSegmentInfo, Interval)]): QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }

  override def setIntervals(ins: List[Interval]): QuerySpec = ???
  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class TimeSeriesQuerySpec(
                                val queryType: String,
                                val dataSource: String,
                                val intervals: List[String],
                                val granularity: Either[String,GranularitySpec],
                                val filter: Option[FilterSpec],
                                override val aggregations: List[AggregationSpec],
                                override val postAggregations: Option[List[PostAggregationSpec]]
                                ) extends QuerySpec {
  def this(dataSource: String,
           intervals: List[String],
           granularity: Either[String,GranularitySpec],
           filters: Option[FilterSpec],
           aggregations: List[AggregationSpec],
           postAggregations: Option[List[PostAggregationSpec]]) = this("timeseries",
    dataSource, intervals, granularity, filters, aggregations, postAggregations)

  def setIntervals(ins : List[Interval]) = this.copy(intervals = ins.map(_.toString))
  def intervalList: List[String] = intervals

  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec =
    TimeSeriesQuerySpecWithSegIntervals(
      queryType,
      dataSource,
      granularity,
      filter,
      aggregations,
      postAggregations,
      null
    ).setSegIntervals(segIns)

  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class TimeSeriesQuerySpecWithSegIntervals(
                                             val queryType: String,
                                             val dataSource: String,
                                             val granularity: Either[String,GranularitySpec],
                                             val filter: Option[FilterSpec],
                                             override val aggregations: List[AggregationSpec],
                                             override val postAggregations:
                                             Option[List[PostAggregationSpec]],
                                             val intervals: SegmentIntervals
                                           ) extends QuerySpec {

  override def intervalList: List[String] = intervals.segments.map(_.itvl)

  override def setSegIntervals(segInAssignments: List[(DruidSegmentInfo, Interval)]): QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }

  override def setIntervals(ins: List[Interval]): QuerySpec = ???
  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class TopNQuerySpec(
                          val queryType: String,
                          val dataSource: String,
                          val intervals: List[String],
                          val granularity: Either[String,GranularitySpec],
                          val filter: Option[FilterSpec],
                          override val aggregations: List[AggregationSpec],
                          override val postAggregations: Option[List[PostAggregationSpec]],
                          val dimension: DimensionSpec,
                          val threshold: Int,
                          val metric: TopNMetricSpec
                          ) extends QuerySpec {
  def this(dataSource: String,
           intervals: List[String],
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           aggregations: List[AggregationSpec],
           postAggregations: Option[List[PostAggregationSpec]],
           dimension: DimensionSpec,
           threshold: Int,
           metric: TopNMetricSpec) = this("topN", dataSource,
    intervals, granularity, filter, aggregations,
    postAggregations, dimension, threshold, metric)

  def setIntervals(ins : List[Interval]) = this.copy(intervals = ins.map(_.toString))
  def intervalList: List[String] = intervals

  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec = ???
  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))

  override def dimensions : List[DimensionSpec] =
    List(dimension)
}

case class SearchQuerySpec(
                            val queryType: String,
                            val dataSource: String,
                            val intervals: List[String],
                            val granularity: Either[String,GranularitySpec],
                            val filter: Option[FilterSpec],
                            val searchDimensions : List[String],
                            val query : SearchQueryQuerySpec,
                            val limit : Int,
                            val sort : Option[SortSearchQuerySpec]
                          ) extends QuerySpec {

  def this(dataSource: String,
           intervals: List[String],
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           searchDimensions : List[String],
           query : SearchQueryQuerySpec,
           limit : Int,
           sort : Option[SortSearchQuerySpec] = None
  ) = this("search", dataSource, intervals, granularity,
    filter, searchDimensions, query, limit, sort)

  override def intervalList: List[String] = intervals

  override def setSegIntervals(segIns: List[(DruidSegmentInfo, Interval)]): QuerySpec =
    SearchQuerySpecWithSegIntervals(
      queryType,
      dataSource,
      null,
      granularity,
      filter,
      searchDimensions,
      query,
      limit,
      sort
    ).setSegIntervals(segIns)

  override def setIntervals(ins: List[Interval]): QuerySpec =
    this.copy(intervals = ins.map(_.toString))

  override def setFilter(fSpec: FilterSpec): QuerySpec = this.copy(filter = Some(fSpec))

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[QueryResultRow] =
    new SearchQueryResultIterator(useSmile, is, onDone)


  override def dimensions : List[DimensionSpec] =
    searchDimensions.map(d => new DefaultDimensionSpec(d, d))
}

case class SearchQuerySpecWithSegIntervals(
                            val queryType: String,
                            val dataSource: String,
                            val intervals: SegmentIntervals,
                            val granularity: Either[String,GranularitySpec],
                            val filter: Option[FilterSpec],
                            val searchDimensions : List[String],
                            val query : SearchQueryQuerySpec,
                            val limit : Int,
                            val sort : Option[SortSearchQuerySpec]
                          ) extends QuerySpec {

  def this(dataSource: String,
           intervals: SegmentIntervals,
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           searchDimensions : List[String],
           query : SearchQueryQuerySpec,
           limit : Int,
           sort : Option[SortSearchQuerySpec] = None
          ) =
    this("search", dataSource, intervals, granularity, filter, searchDimensions, query, limit, sort)

  override def intervalList: List[String] = intervals.segments.map(_.itvl)

  override def setSegIntervals(segInAssignments: List[(DruidSegmentInfo, Interval)]): QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }

  override def setIntervals(ins: List[Interval]): QuerySpec = ???

  override def setFilter(fSpec: FilterSpec): QuerySpec = this.copy(filter = Some(fSpec))

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[QueryResultRow] =
    new SearchQueryResultIterator(useSmile, is, onDone)

  override def dimensions : List[DimensionSpec] =
    searchDimensions.map(d => new DefaultDimensionSpec(d, d))
}