All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sparklinedata.druid.DruidQuerySpec.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.sparklinedata.druid

import java.io.InputStream
import java.util.Locale

import org.apache.spark.sql.sources.druid._

import scala.collection.breakOut
import org.apache.spark.sql.types._
import org.joda.time.Interval
import org.sparklinedata.druid.client._
import org.sparklinedata.druid.metadata.{DruidDataSource, DruidDataType, DruidRelationInfo, DruidSegmentInfo}

sealed trait ExtractionFunctionSpec {
  val `type`: String
}

/**
 * In SQL this is an 'like' or rlike' predicate on a column being grouped on.
  *
  * @param `type`
 * @param expr
 */
case class RegexExtractionFunctionSpec(val `type`: String, val expr: String)
  extends ExtractionFunctionSpec

/**
 * In SQL this is a grouping expression of the form 'if col rlike regex then regex else null'
  *
  * @param `type`
 * @param expr
 */
case class PartialExtractionFunctionSpec(val `type`: String, val expr: String)
  extends ExtractionFunctionSpec

/**
 * In SQL this is a contains predicate on a column being grouped on.
  *
  * @param `type`
 * @param query
 */
case class SearchQueryExtractionFunctionSpec(val `type`: String, val query: String)
  extends ExtractionFunctionSpec

/**
 * In SQL this is a withTimeZone and field extraction functions applied to the time dimension
 * columns. Assume time functions are expressed using
 * [[https://github.com/SparklineData/spark-datetime SparklineData-sparkdatetime package]].
  *
  * @param `type`
 * @param format
 * @param timeZone
 * @param locale
 */
case class TimeFormatExtractionFunctionSpec(val `type`: String,
                                            val format: String,
                                            val timeZone: Option[String],
                                            val locale: Option[String])
  extends ExtractionFunctionSpec {
  def this(format: String,
           timeZone: Option[String]) =
    this("timeFormat", format, timeZone, Some(Locale.getDefault.toString))
}

case class TimeParsingExtractionFunctionSpec(val `type`: String,
                                             val timeFormat: String,
                                             val resultFormat: String)
  extends ExtractionFunctionSpec {
  def this(timeFormat: String, resultFormat: String) =
    this("time", timeFormat, resultFormat)
}

case class JavaScriptExtractionFunctionSpec(val `type`: String,
                                            val `function`: String,
                                            val injective: Boolean = false)
  extends ExtractionFunctionSpec {
  def this(fn: String) = this("javascript", fn)
}

case class LookUpMap(val `type`: String, val `map`: Map[String, String])

case class InExtractionFnSpec(val `type`: String, lookup: LookUpMap)
  extends ExtractionFunctionSpec {
  def this(valLst: List[String]) = this("lookup",
    LookUpMap("map", valLst.map(x => (x, "true"))(breakOut)))
}

/**
  * As defined in [[http://druid.io/docs/latest/querying/dimensionspecs.html]]
 */
sealed trait DimensionSpec {
  val `type`: String
  val dimension: String
  val outputName: String

  def sparkDataType(dDS : DruidDataSource) : DataType = StringType
}

/**
 * In SQL these are columns being grouped on.
  *
  * @param `type`
 * @param dimension
 * @param outputName
 */
case class DefaultDimensionSpec(val `type`: String, val dimension: String,
                                val outputName: String) extends DimensionSpec {
  def this(dimension : String,
           outputName: String) = this("default", dimension, outputName)
  def this(dimension : String) = this("default", dimension, dimension)
}

case class ExtractionDimensionSpec(val `type`: String,
                                   val dimension: String,
                                   val outputName: String,
                                   extractionFn: ExtractionFunctionSpec) extends DimensionSpec {
  def this(dimension: String,
           outputName: String,
           extractionFn: ExtractionFunctionSpec) =
    this("extraction", dimension, outputName, extractionFn)
}

sealed trait GranularitySpec {
  val `type`: String
}

case class DurationGranularitySpec(`type`: String, duration: Long) extends GranularitySpec

case class PeriodGranularitySpec(`type`: String, period: String,
                                 timeZone: Option[String],
                                 origin: Option[String]) extends GranularitySpec {
  def this(period: String) = this("period", period, None, None)
}

sealed trait FilterSpec {
  val `type`: String
}

case class SelectorFilterSpec(`type`: String,
                              dimension: String,
                              value: String) extends FilterSpec {
  def this(dimension: String,
           value: String) = this("selector", dimension, value)
}

case class RegexFilterSpec(`type`: String,
                           dimension: String,
                           pattern: String) extends FilterSpec {
  def this(dimension: String,
           pattern: String) = this("regex", dimension, pattern)
}

case class StringContainsSpec(`type`: String,
                              dimension: String,
                              value: String) extends FilterSpec {
  def this(dimension: String,
           value: String) = this("contains", dimension, value)
}

case class LogicalFilterSpec(`type`: String,
                             fields: List[FilterSpec]) extends FilterSpec {

  def flatten : LogicalFilterSpec = {
    LogicalFilterSpec(`type`,
      fields.flatten {
      case ac@LogicalFilterSpec(t, c) if t == `type`=> ac.flatten.fields
      case c => Seq(c)
    }
    )
  }

}

case class NotFilterSpec(`type`: String,
                         field: FilterSpec) extends FilterSpec

case class ExtractionFilterSpec(`type`: String,
                                dimension: String,
                                value: String,
                                extractionFn: InExtractionFnSpec) extends FilterSpec {
  def this(dimension: String, valList: List[String]) = this("extraction", dimension, "true",
    new InExtractionFnSpec(valList))
}

/**
 * In SQL an invocation on a special JSPredicate is translated to this FilterSpec.
 * JSPredicate has the signature jspredicate(colum, jsFuncCodeAsString)
  *
  * @param `type`
 * @param dimension
 * @param function
 */
case class JavascriptFilterSpec(`type`: String,
                                dimension: String,
                                function: String) extends FilterSpec {
  def this(dimension: String,
           function: String) = this("javascript", dimension, function)

}

object JavascriptFilterSpec {

  def jsFn(compareOp : String, value : String) : String = {
     s"function(x) { return(x $compareOp '$value') }"
  }

  def create(dimension: String,
           compareOp : String,
           value : String) = new JavascriptFilterSpec(dimension, jsFn(compareOp, value))

}

case class BoundFilterSpec(`type`: String,
                           dimension: String,
                           lower: Option[String],
                           lowerStrict : Option[Boolean],
                           upper: Option[String],
                           upperStrict : Option[Boolean],
                           alphaNumeric: Boolean) extends FilterSpec {

  def this(dimension: String,
           lower: Option[String],
           lowerStrict : Option[Boolean],
           upper: Option[String],
           upperStrict : Option[Boolean],
           alphaNumeric: Boolean) =
    this("bound", dimension, lower, lowerStrict, upper, upperStrict, alphaNumeric)
}

case class RectangularBound(minCoords : Array[Double],
                            maxCoords : Array[Double],
                           `type` : String = "rectangular") {

  def combine(other: RectangularBound): RectangularBound = {
    minCoords.zip(other.minCoords).map(t => Math.max(t._1, t._2))

    RectangularBound(
      minCoords.zip(other.minCoords).map(t => Math.max(t._1, t._2)),
      maxCoords.zip(other.maxCoords).map(t => Math.min(t._1, t._2))
    )
  }

}

case class SpatialFilterSpec(
                            dimension : String,
                            bound : RectangularBound,
                            `type` : String = "spatial"
                            ) extends FilterSpec {

  def combine(other : SpatialFilterSpec) : SpatialFilterSpec = {
    assert(other.dimension == dimension)
    SpatialFilterSpec(dimension, bound.combine(other.bound))
  }

}

sealed trait AggregationSpec {
  val `type`: String
  val name : String

  // TODO: get rid of this method: eventually translation of sql infers DataType
  def sparkDataType(dDS : DruidDataSource) : DataType
}

/**
 * In SQL an aggregation expression on a metric is translated to this Spec.
  *
  * @param `type` can be "count", "longSum", "doubleSum", "min", "max", "hyperUnique"
 * @param name
 * @param fieldName
 */
case class FunctionAggregationSpec(val `type`: String,
                                   val name: String,
                                   val fieldName: String
                                    ) extends AggregationSpec {
  def sparkDataType(dDS : DruidDataSource) : DataType =
    dDS.metric(fieldName).map(c =>
      DruidDataType.sparkDataType(c.dataType)).getOrElse(
        throw new DruidDataSourceException(s"Unknown field $fieldName"))
}

/**
 * In SQL a count(distinct dimColumn) is translated to this Spec.
  *
  * @param `type`
 * @param name
 * @param fieldNames
 * @param byRow
 */
case class CardinalityAggregationSpec(val `type`: String,
                                      val name: String,
                                      val fieldNames: List[String],
                                      val byRow: Boolean
                                       ) extends AggregationSpec {
  def this(name: String,
           fieldNames: List[String]) = this("cardinality", name, fieldNames, true)

  def sparkDataType(dDS : DruidDataSource) : DataType = DoubleType
}

case class HyperUniqueAggregationSpec(val `type`: String,
                                      val name: String,
                                      val fieldName : String) extends AggregationSpec {

  def this(name: String,
           fieldName: String) = this("hyperUnique", name, fieldName)

  def sparkDataType(dDS : DruidDataSource) : DataType = DoubleType

}

/**
 * In SQL this is an invocation on a special JSAgg function. Its signature is
 * JSAggLong(metricCol, aggFnCode, combineFnCode, resetFnCode). Similar kind of
 * function double metrics: JSAggDouble.
  *
  * @param `type`
 * @param name
 * @param fieldNames
 * @param fnAggregate
 * @param fnCombine
 * @param fnReset
 */
case class JavascriptAggregationSpec(val `type`: String,
                                     val name: String,
                                     val fieldNames: List[String],
                                     val fnAggregate: String,
                                     val fnCombine: String,
                                     val fnReset: String
                                      ) extends AggregationSpec {

  // for now assuming it is always Double
  def sparkDataType(dDS : DruidDataSource) : DataType = DoubleType
}

/**
 * In SQL this is an aggregation on an If condition. For example
 * sum(if dimCol = value then metric else null end)
  *
  * @param `type`
 * @param filter
 * @param aggregator
 */
case class FilteredAggregationSpec(val `type`: String,
                                   val name: String,
                                   val filter: SelectorFilterSpec,
                                   val aggregator: AggregationSpec
                                    ) extends AggregationSpec {

  def sparkDataType(dDS : DruidDataSource) : DataType = aggregator.sparkDataType(dDS)
}

sealed trait PostAggregationSpec {
  val `type`: String
  val name: String

  // TODO: get rid of this method: eventually translation of sql infers DataType
  // for now set them to DoubleType
  def sparkDataType(dDS : DruidDataSource) : DataType = DoubleType
}

case class FieldAccessPostAggregationSpec(val `type`: String,
                                          val fieldName: String
                                           ) extends PostAggregationSpec {
  def this(fieldName: String) = this("fieldAccess", fieldName)
  override val name = fieldName
}

case class ConstantPostAggregationSpec(val `type`: String,
                                       val name: String,
                                       val value: Double
                                        ) extends PostAggregationSpec

case class HyperUniqueCardinalityPostAggregationSpec(val `type`: String,
                                                     val name: String,
                                                     val fieldName: String
                                                      ) extends PostAggregationSpec

/**
 * In SQL this is an expression involving at least 1 aggregation expression
  *
  * @param `type`
 * @param name
 * @param fn can be +, -, *, /
 * @param fields
 * @param ordering used if the ordering is on the post aggregation expression.
 */
case class ArithmeticPostAggregationSpec(val `type`: String,
                                         val name: String,
                                         val fn: String,
                                         val fields: List[PostAggregationSpec],
                                         val ordering: Option[String]
                                          ) extends PostAggregationSpec {
  def this(name: String,
  fn: String,
  fields: List[PostAggregationSpec],
  ordering: Option[String]) = this("arithmetic", name, fn, fields, ordering)
}

case class JavascriptPostAggregationSpec(val `type`: String,
                                         val name: String,
                                         val fields: List[PostAggregationSpec],
                                         val function: String
                                          ) extends PostAggregationSpec

/**
 *
 * @param dimension
 * @param direction can be "ascending"|"descending"
 */
case class OrderByColumnSpec(val dimension: String,
                             val direction: String) {

  def this(dimension: String, asc : Boolean) =
    this(dimension, if (asc) "ascending" else "descending")

  def this(dimension: String) =
    this(dimension, "ascending")
}

case class LimitSpec(val `type`: String,
                     val limit: Int,
                     val columns: List[OrderByColumnSpec]) {

  def this(limit: Int,
           columns: List[OrderByColumnSpec]) = this("default", limit, columns)

  def this(limit: Int,
           columns: OrderByColumnSpec*) = this("default", limit, columns.toList)
}

sealed trait HavingSpec {
  val `type`: String
}

/**
 *
 * @param `type` is greaterThan, equalTo, lessThan
 * @param aggregation
 * @param value
 */
case class ComparisonHavingSpec(val `type`: String,
                                val aggregation: String,
                                val value: Double)

case class LogicalBinaryOpHavingSpec(val `type`: String,
                                     val havingSpecs: List[HavingSpec])

case class NotOpHavingSpec(val `type`: String,
                           val havingSpec: HavingSpec)

sealed trait TopNMetricSpec {
  val `type`: String
}

case class NumericTopNMetricSpec(
                                  val `type`: String,
                                  val metric: String
                                  ) extends TopNMetricSpec {
  def this(metric: String) = this("numeric", metric)
}

case class LexiographicTopNMetricSpec(
                                       val `type`: String,
                                       val previousStop: String
                                       ) extends TopNMetricSpec {
  def this(previousStop: String) = this("lexicographic", previousStop)
}

case class AlphaNumericTopNMetricSpec(
                                       val `type`: String,
                                       val previousStop: String
                                       ) extends TopNMetricSpec

case class InvertedTopNMetricSpec(
                                   val `type`: String,
                                   val metric: TopNMetricSpec
                                   ) extends TopNMetricSpec {
  def this(metric: TopNMetricSpec) = this("inverted", metric)
}


case class SegmentInterval(itvl : String,
                           ver : String,
                           part : Option[Int]
                          )

case class SegmentIntervals(`type` : String,
                            segments : List[SegmentInterval]
                           ) {
  def this(segInAssignments: List[(DruidSegmentInfo, Interval)]) = {
    this("segments", segInAssignments.map {
      case (segInfo, in) =>
        val itvl: String = in.toString
        val ver: String = segInfo.version
        val part: Option[Int] = segInfo.shardSpec.flatMap(_.partitionNum)
        SegmentInterval(itvl, ver, part)
    }
    )
  }
}

object SegmentIntervals {

  def segmentIntervals(segInAssignments: List[DruidSegmentInfo]) : SegmentIntervals = {
    SegmentIntervals("segments", segInAssignments.map {
      case segInfo =>
        val itvl: String = segInfo.interval
        val ver: String = segInfo.version
        val part: Option[Int] = segInfo.shardSpec.flatMap(_.partitionNum)
        SegmentInterval(itvl, ver, part)
    }
    )
  }
}

trait SearchQueryQuerySpec {
  self : Product =>
}

case class InsensitiveContainsSearchQuerySpec (
                                                `type` : String,
                                                value : String
                                              ) extends SearchQueryQuerySpec {
  def this() = this("insensitive_contains", "")
}

case class SortSearchQuerySpec(
                                `type` : String
                              )

case class QuerySpecContext(
                             var queryId : String,
                           timeout : Option[Long] = None,
                           priority : Option[Int] = None,
                             useCache : Option[Boolean] = None,
                             populateCache : Option[Boolean] = None,
                             bySegment : Option[Boolean] = None,
                             chunkPeriod : Option[String] = None,
                             minTopNThreshold : Option[Int] = None,
                             maxResults : Option[Int] = None,
                             maxIntermediateRows : Option[Int] = None,
                             groupByIsSingleThreaded : Option[Boolean] = None,
                             var groupByStrategy : Option[String] = None
                           )

sealed trait QuerySpec {
  self : Product =>
  val queryType: String
  val dataSource: String

  def intervalList: List[String]
  def setIntervals(ins : List[Interval]) : QuerySpec
  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec

  def dimensionSpecs : List[DimensionSpec] = Nil

  def filter : Option[FilterSpec]

  def setFilter(fSpec : FilterSpec) : QuerySpec

  def context : Option[QuerySpecContext]

  def apply(useSmile : Boolean,
            is : InputStream,
            druidQuerySvrConn: DruidClient,
            onDone : => Unit = (),
            fromList : Boolean = false) : CloseableIterator[ResultRow]

  def schemaFromQuerySpec(dInfo : DruidRelationInfo) : StructType

  def executeQuery(queryClient : DruidQueryServerClient) : CloseableIterator[ResultRow] =
    queryClient.executeQueryAsStream(this)

  def mapSparkColNameToDruidColName(druidRelationInfo: DruidRelationInfo) : Map[String, String] =
    Map()

}

abstract class AggQuerySpec extends QuerySpec {
  self : Product =>

  def dimensions : List[DimensionSpec] = Nil
  override def dimensionSpecs : List[DimensionSpec] = dimensions
  def aggregations : List[AggregationSpec] = Nil
  def postAggregations : Option[List[PostAggregationSpec]] = None

  def schemaFromQuerySpec(dInfo : DruidRelationInfo) : StructType = {
    val fields : List[StructField] = dimensionSpecs.map{ d =>
      new StructField(d.outputName, d.sparkDataType(dInfo.druidDS))
    } ++
      aggregations.map {a =>
        new StructField(a.name, a.sparkDataType(dInfo.druidDS))
      } ++
      postAggregations.map{ ps =>
        ps.map {p =>
          new StructField(p.name, p.sparkDataType(dInfo.druidDS))
        }
      }.getOrElse(Nil)
    StructType(fields)
  }

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     druidQuerySvrConn: DruidClient,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[ResultRow] =
    DruidQueryResultIterator(useSmile, is, onDone, fromList)

}

case class GroupByQuerySpec(
                             val queryType: String,
                             val dataSource: String,
                             override val dimensions: List[DimensionSpec],
                             val limitSpec: Option[LimitSpec],
                             val having: Option[HavingSpec],
                             val granularity: Either[String,GranularitySpec],
                             val filter: Option[FilterSpec],
                             override val aggregations: List[AggregationSpec],
                             override val postAggregations: Option[List[PostAggregationSpec]],
                             val intervals: List[String],
                             override val context : Option[QuerySpecContext]
                             ) extends AggQuerySpec {
  def this(dataSource: String,
           dimensions: List[DimensionSpec],
           limitSpec: Option[LimitSpec],
           having: Option[HavingSpec],
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           aggregations: List[AggregationSpec],
           postAggregations: Option[List[PostAggregationSpec]],
           intervals: List[String],
           context : Option[QuerySpecContext]) = this("groupBy",
    dataSource, dimensions, limitSpec, having, granularity, filter,
    aggregations, postAggregations, intervals, context)

  def setIntervals(ins : List[Interval]) = this.copy(intervals = ins.map(_.toString))
  def intervalList: List[String] = intervals

  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec =
    GroupByQuerySpecWithSegIntervals(
      queryType,
      dataSource,
      dimensionSpecs,
      limitSpec,
      having,
      granularity,
      filter,
      aggregations,
      postAggregations,
      null,
      context
    ).setSegIntervals(segIns)

  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class GroupByQuerySpecWithSegIntervals(
                             val queryType: String,
                             val dataSource: String,
                             override val dimensions: List[DimensionSpec],
                             val limitSpec: Option[LimitSpec],
                             val having: Option[HavingSpec],
                             val granularity: Either[String,GranularitySpec],
                             val filter: Option[FilterSpec],
                             override val aggregations: List[AggregationSpec],
                             override val postAggregations: Option[List[PostAggregationSpec]],
                             val intervals: SegmentIntervals,
                             override val context : Option[QuerySpecContext]
                           ) extends AggQuerySpec {

  override def intervalList: List[String] = intervals.segments.map(_.itvl)

  override def setSegIntervals(segInAssignments: List[(DruidSegmentInfo, Interval)]): QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }

  override def setIntervals(ins: List[Interval]): QuerySpec = ???
  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class TimeSeriesQuerySpec(
                                val queryType: String,
                                val dataSource: String,
                                val intervals: List[String],
                                val granularity: Either[String,GranularitySpec],
                                val filter: Option[FilterSpec],
                                override val aggregations: List[AggregationSpec],
                                override val postAggregations: Option[List[PostAggregationSpec]],
                                override val context : Option[QuerySpecContext]
                                ) extends AggQuerySpec {
  def this(dataSource: String,
           intervals: List[String],
           granularity: Either[String,GranularitySpec],
           filters: Option[FilterSpec],
           aggregations: List[AggregationSpec],
           postAggregations: Option[List[PostAggregationSpec]],
           context : Option[QuerySpecContext]) = this("timeseries",
    dataSource, intervals, granularity, filters, aggregations, postAggregations, context)

  def setIntervals(ins : List[Interval]) = this.copy(intervals = ins.map(_.toString))
  def intervalList: List[String] = intervals

  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec =
    TimeSeriesQuerySpecWithSegIntervals(
      queryType,
      dataSource,
      granularity,
      filter,
      aggregations,
      postAggregations,
      null,
      context
    ).setSegIntervals(segIns)

  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class TimeSeriesQuerySpecWithSegIntervals(
                                             val queryType: String,
                                             val dataSource: String,
                                             val granularity: Either[String,GranularitySpec],
                                             val filter: Option[FilterSpec],
                                             override val aggregations: List[AggregationSpec],
                                             override val postAggregations:
                                             Option[List[PostAggregationSpec]],
                                             val intervals: SegmentIntervals,
                                             override val context : Option[QuerySpecContext]
                                           ) extends AggQuerySpec {
  override def intervalList: List[String] = intervals.segments.map(_.itvl)

  override def setSegIntervals(segInAssignments: List[(DruidSegmentInfo, Interval)]): QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }

  override def setIntervals(ins: List[Interval]): QuerySpec = ???
  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class TopNQuerySpec(
                          val queryType: String,
                          val dataSource: String,
                          val intervals: List[String],
                          val granularity: Either[String,GranularitySpec],
                          val filter: Option[FilterSpec],
                          override val aggregations: List[AggregationSpec],
                          override val postAggregations: Option[List[PostAggregationSpec]],
                          val dimension: DimensionSpec,
                          val threshold: Int,
                          val metric: TopNMetricSpec,
                          override val context : Option[QuerySpecContext]
                          ) extends AggQuerySpec {
  def this(dataSource: String,
           intervals: List[String],
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           aggregations: List[AggregationSpec],
           postAggregations: Option[List[PostAggregationSpec]],
           dimension: DimensionSpec,
           threshold: Int,
           metric: TopNMetricSpec,
           context : Option[QuerySpecContext]) = this("topN", dataSource,
    intervals, granularity, filter, aggregations,
    postAggregations, dimension, threshold, metric, context)

  def setIntervals(ins : List[Interval]) = this.copy(intervals = ins.map(_.toString))
  def intervalList: List[String] = intervals

  def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec =
    TopNQuerySpecWithSegIntervals(
      queryType,
      dataSource,
      null,
      granularity,
      filter,
      aggregations,
      postAggregations,
      dimension,
      threshold,
      metric,
      context
    ).setSegIntervals(segIns)

  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))

  override def dimensions : List[DimensionSpec] =
    List(dimension)

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     druidQuerySvrConn: DruidClient,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[ResultRow] =
    DruidTopNResultIterator(useSmile, is, onDone, fromList)
}

case class TopNQuerySpecWithSegIntervals(
                          val queryType: String,
                          val dataSource: String,
                          val intervals: SegmentIntervals,
                          val granularity: Either[String,GranularitySpec],
                          val filter: Option[FilterSpec],
                          override val aggregations: List[AggregationSpec],
                          override val postAggregations: Option[List[PostAggregationSpec]],
                          val dimension: DimensionSpec,
                          val threshold: Int,
                          val metric: TopNMetricSpec,
                          override val context : Option[QuerySpecContext]
                        ) extends AggQuerySpec {
  def this(dataSource: String,
           intervals: SegmentIntervals,
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           aggregations: List[AggregationSpec],
           postAggregations: Option[List[PostAggregationSpec]],
           dimension: DimensionSpec,
           threshold: Int,
           metric: TopNMetricSpec,
           context : Option[QuerySpecContext]) = this("topN", dataSource,
    intervals, granularity, filter, aggregations,
    postAggregations, dimension, threshold, metric, context)

  def setIntervals(ins : List[Interval]) = ???
  def intervalList: List[String] = intervals.segments.map(_.itvl)

  def setSegIntervals(segInAssignments : List[(DruidSegmentInfo, Interval)]) : QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }
  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))

  override def dimensions : List[DimensionSpec] =
    List(dimension)

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     druidQuerySvrConn: DruidClient,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[ResultRow] =
    DruidTopNResultIterator(useSmile, is, onDone, fromList)

}

case class SearchQuerySpec(
                            val queryType: String,
                            val dataSource: String,
                            val intervals: List[String],
                            val granularity: Either[String,GranularitySpec],
                            val filter: Option[FilterSpec],
                            val searchDimensions : List[String],
                            val query : SearchQueryQuerySpec,
                            val limit : Int,
                            val sort : Option[SortSearchQuerySpec],
                            override val context : Option[QuerySpecContext]
                          ) extends AggQuerySpec {

  def this(dataSource: String,
           intervals: List[String],
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           searchDimensions : List[String],
           query : SearchQueryQuerySpec,
           limit : Int,
           sort : Option[SortSearchQuerySpec],
           context : Option[QuerySpecContext]
  ) = this("search", dataSource, intervals, granularity,
    filter, searchDimensions, query, limit, sort, context)

  override def intervalList: List[String] = intervals

  override def setSegIntervals(segIns: List[(DruidSegmentInfo, Interval)]): QuerySpec =
    SearchQuerySpecWithSegIntervals(
      queryType,
      dataSource,
      null,
      granularity,
      filter,
      searchDimensions,
      query,
      limit,
      sort,
      context
    ).setSegIntervals(segIns)

  override def setIntervals(ins: List[Interval]): QuerySpec =
    this.copy(intervals = ins.map(_.toString))

  override def setFilter(fSpec: FilterSpec): QuerySpec = this.copy(filter = Some(fSpec))

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     druidQuerySvrConn: DruidClient,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[QueryResultRow] =
    new SearchQueryResultIterator(useSmile, is, onDone)


  override def dimensions : List[DimensionSpec] =
    searchDimensions.map(d => new DefaultDimensionSpec(d, d))

}

case class SearchQuerySpecWithSegIntervals(
                            val queryType: String,
                            val dataSource: String,
                            val intervals: SegmentIntervals,
                            val granularity: Either[String,GranularitySpec],
                            val filter: Option[FilterSpec],
                            val searchDimensions : List[String],
                            val query : SearchQueryQuerySpec,
                            val limit : Int,
                            val sort : Option[SortSearchQuerySpec],
                            override val context : Option[QuerySpecContext]
                          ) extends AggQuerySpec {


  def this(dataSource: String,
           intervals: SegmentIntervals,
           granularity: Either[String,GranularitySpec],
           filter: Option[FilterSpec],
           searchDimensions : List[String],
           query : SearchQueryQuerySpec,
           limit : Int,
           sort : Option[SortSearchQuerySpec] = None,
           context : Option[QuerySpecContext]
          ) =
    this("search", dataSource, intervals, granularity,
      filter, searchDimensions, query, limit, sort, context)

  override def intervalList: List[String] = intervals.segments.map(_.itvl)

  override def setSegIntervals(segInAssignments: List[(DruidSegmentInfo, Interval)]): QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }

  override def setIntervals(ins: List[Interval]): QuerySpec = ???

  override def setFilter(fSpec: FilterSpec): QuerySpec = this.copy(filter = Some(fSpec))

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     druidQuerySvrConn: DruidClient,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[QueryResultRow] =
    new SearchQueryResultIterator(useSmile, is, onDone)

  override def dimensions : List[DimensionSpec] =
    searchDimensions.map(n => new DefaultDimensionSpec(n,n))
}

case class PagingSpec(pagingIdentifiers : Map[String, Int],
                      threshold : Int,
                      fromNext : Option[Boolean] = None)

abstract class SelectSpec extends QuerySpec {
  self : Product =>
  def withPagingIdentifier(ps :  Map[String, Int]) : SelectSpec

  def dimensions : List[String]
  override def dimensionSpecs : List[DimensionSpec] =
    dimensions.map(n => new DefaultDimensionSpec(n,n))
  def metrics : List[String]

  def schemaFromQuerySpec(dInfo : DruidRelationInfo) : StructType = {
    val fields : List[StructField] = dimensions.map{ d =>
      val dc = dInfo.druidDS.columns(d)
      new StructField(d, DruidDataType.sparkDataType(dc.dataType))
    } ++ metrics.map{d =>
      val dc = dInfo.druidDS.columns(d)
      new StructField(d, DruidDataType.sparkDataType(dc.dataType))
    }
    StructType(fields)
  }

  override def apply(useSmile : Boolean,
                     is : InputStream,
                     druidQuerySvrConn: DruidClient,
                     onDone : => Unit = (),
                     fromList : Boolean = false) : CloseableIterator[ResultRow] =
    DruidSelectResultIterator(useSmile, is, this,
      druidQuerySvrConn.asInstanceOf[DruidQueryServerClient], onDone, fromList
    )

  override def mapSparkColNameToDruidColName(druidRelationInfo: DruidRelationInfo) :
  Map[String, String] =
    druidRelationInfo.sourceToDruidMapping.mapValues{
      case dc if dc.name == DruidDataSource.TIME_COLUMN_NAME =>
        DruidDataSource.EVENT_TIMESTAMP_KEY_NAME
      case dc => dc.name
    }

  def descending : Boolean

}

case class SelectSpecWithIntervals(queryType: String,
                                   dataSource: String,
                                   dimensions: List[String],
                                   metrics: List[String],
                                   filter: Option[FilterSpec],
                                   pagingSpec : PagingSpec,
                                   intervals: List[String] = List(),
                                   descending : Boolean = false,
                                   granularity : String = "all",
                                   override val context : Option[QuerySpecContext])
  extends SelectSpec {

  def this(dataSource: String,
           dimensions: List[String],
           metrics: List[String],
           filter: Option[FilterSpec],
           pagingSpec : PagingSpec,
           intervals: List[String] = List(),
           descending : Boolean = false,
           granularity : String = "all",
           context : Option[QuerySpecContext]) = this(
    "select", dataSource, dimensions, metrics, filter, pagingSpec,
    intervals, descending, granularity, context
  )

  def withPagingIdentifier(ps :  Map[String, Int]) = {
    copy(pagingSpec = pagingSpec.copy(pagingIdentifiers = ps, fromNext = Some(true)))
  }

  override def setIntervals(ins : List[Interval]) = this.copy(intervals = ins.map(_.toString))

  override def setSegIntervals(segIns : List[(DruidSegmentInfo, Interval)]) : QuerySpec =
    SelectSpecWithSegmentIntervals(
      queryType,
      dataSource,
      dimensions,
      metrics,
      filter,
      pagingSpec,
      null,
      descending,
      granularity,
      context
    ).setSegIntervals(segIns)

  def intervalList: List[String] = intervals

  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))
}

case class SelectSpecWithSegmentIntervals(queryType: String,
                                          dataSource: String,
                                          dimensions: List[String],
                                          metrics: List[String],
                                          filter: Option[FilterSpec],
                                          pagingSpec : PagingSpec,
                                          intervals: SegmentIntervals,
                                          descending : Boolean = false,
                                          granularity : String = "all",
                                          override val context : Option[QuerySpecContext])
  extends SelectSpec {

  def withPagingIdentifier(ps :  Map[String, Int]) = {
    copy(pagingSpec = pagingSpec.copy(pagingIdentifiers = ps, fromNext = Some(true)))
  }

  override def setSegIntervals(segInAssignments: List[(DruidSegmentInfo, Interval)]): QuerySpec = {
    this.copy(intervals = new SegmentIntervals(segInAssignments))
  }

  override def setIntervals(ins: List[Interval]): QuerySpec = ???

  override def intervalList: List[String] = intervals.segments.map(_.itvl)

  def setFilter(fSpec : FilterSpec) : QuerySpec = this.copy(filter = Some(fSpec))

}

/**
  * Used for testing only.
  */
case class DummyQuerySpec(val queryType : String = null) extends QuerySpec {

  override val dataSource: String = null

  override def intervalList: List[String] = ???

  override def setIntervals(ins: List[Interval]): QuerySpec = ???

  override def setSegIntervals(segIns: List[(DruidSegmentInfo, Interval)]): QuerySpec = ???

  override def filter: Option[FilterSpec] = ???

  override def setFilter(fSpec: FilterSpec): QuerySpec = ???

  override def context: Option[QuerySpecContext] = ???

  override def schemaFromQuerySpec(dInfo: DruidRelationInfo): StructType = ???

  def apply(useSmile : Boolean,
            is : InputStream,
            druidQuerySvrConn: DruidClient,
            onDone : => Unit = (),
            fromList : Boolean = false) : CloseableIterator[ResultRow] =
    new DummyResultIterator()
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy