
org.sparklinedata.druid.DruidQueryBuilder.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sparklinedata.druid
import java.util.concurrent.atomic.AtomicLong
import scala.collection.mutable.{Map => MMap}
import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression}
import org.apache.spark.sql.catalyst.plans.logical.Aggregate
import org.apache.spark.sql.types.DataType
import org.sparklinedata.druid.metadata.{DruidColumn, DruidRelationInfo}
import org.sparklinedata.druid.metadata.DruidRelationColumn
import scala.collection.mutable
/**
*
* @param drInfo
* @param queryIntervals
* @param referencedDruidColumns
* @param dimensions
* @param limitSpec
* @param havingSpec
* @param granularitySpec
* @param filterSpec
* @param aggregations
* @param postAggregations
* @param projectionAliasMap map from projected alias name to underlying column name.
* @param outputAttributeMap list of output Attributes with the ExprId of the Attribute they
* represent, the DataType in the original Plan and the DataType
* from Druid.
* @param aggExprToLiteralExpr for expressions that represent a 'null' value for
* a this GroupingSet or represent the 'grouping__id'
* columns, this is a map to the Literal value that is
* filled in the Projection above the DruidRDD.
* @param aggregateOper
* @param curId
*/
case class DruidQueryBuilder(val drInfo: DruidRelationInfo,
queryIntervals: QueryIntervals,
referencedDruidColumns : MMap[String,DruidRelationColumn] = MMap(),
dimensions: List[DimensionSpec] = Nil,
limitSpec: Option[LimitSpec] = None,
havingSpec: Option[HavingSpec] = None,
granularitySpec: Either[String, GranularitySpec] = Left("all"),
filterSpec: Option[FilterSpec] = None,
aggregations: List[AggregationSpec] = Nil,
postAggregations: Option[List[PostAggregationSpec]] = None,
projectionAliasMap: Map[String, String] = Map(),
outputAttributeMap:
Map[String, (Expression, DataType, DataType, String)] = Map(),
// avg expressions to perform in the Project Operator
// on top of Druid PhysicalScan
avgExpressions : Map[Expression, (String, String)] = Map(),
aggExprToLiteralExpr: Map[Expression, Expression] = Map(),
aggregateOper: Option[Aggregate] = None,
curId: AtomicLong = new AtomicLong(-1),
origProjList : Option[Seq[NamedExpression]] = None,
origFilter : Option[Expression] = None,
hasUnpushedProjections : Boolean = false,
hasUnpushedFilters : Boolean = false) {
def hasUnpushedExpressions = hasUnpushedProjections || hasUnpushedFilters
def dimension(d: DimensionSpec) = {
this.copy(dimensions = (dimensions :+ d))
}
def limit(l: LimitSpec) = {
this.copy(limitSpec = Some(l))
}
def having(h: HavingSpec) = {
this.copy(havingSpec = Some(h))
}
def granularity(g: GranularitySpec) = {
this.copy(granularitySpec = Right(g))
}
def filter(f: FilterSpec) = filterSpec match {
case Some(f1: FilterSpec) =>
this.copy(filterSpec = Some(LogicalFilterSpec("and", List(f1, f))))
case None => this.copy(filterSpec = Some(f))
}
def aggregate(a: AggregationSpec) = {
this.copy(aggregations = (aggregations :+ a))
}
def postAggregate(p: PostAggregationSpec) = postAggregations match {
case None => this.copy(postAggregations = Some(List(p)))
case Some(pAs) => this.copy(postAggregations = Some(pAs :+ p))
}
def interval(iC: IntervalCondition): Option[DruidQueryBuilder] = iC.typ match {
case IntervalConditionType.LT =>
queryIntervals.ltCond(iC.dt).map(qI => this.copy(queryIntervals = qI))
case IntervalConditionType.LTE =>
queryIntervals.ltECond(iC.dt).map(qI => this.copy(queryIntervals = qI))
case IntervalConditionType.GT =>
queryIntervals.gtCond(iC.dt).map(qI => this.copy(queryIntervals = qI))
case IntervalConditionType.GTE =>
queryIntervals.gtECond(iC.dt).map(qI => this.copy(queryIntervals = qI))
}
def outputAttribute(nm: String, e: Expression, originalDT: DataType,
druidDT: DataType, tfName: String = null) = {
val tf = if (tfName == null) DruidValTransform.getTFName(druidDT) else tfName
this.copy(outputAttributeMap = outputAttributeMap + (nm ->(e, originalDT, druidDT, tf)))
}
def avgExpression(e: Expression, sumAlias : String, cntAlias : String) = {
this.copy(avgExpressions = avgExpressions + (e ->(sumAlias, cntAlias)))
}
def aggregateOp(op: Aggregate) = this.copy(aggregateOper = Some(op))
def nextAlias: String = s"alias${curId.getAndDecrement()}"
def nextAlias(cn: String): String = {
var oAttrName = cn + nextAlias
while (drInfo.sourceToDruidMapping.contains(oAttrName)) {
oAttrName = cn + nextAlias
}
oAttrName
}
def isDruidNonTimeDimension(name : String) : Boolean = {
druidColumn(name).map(_.isDimension(true)).getOrElse(false)
}
def druidColumn(name: String): Option[DruidRelationColumn] = {
drInfo.sourceToDruidMapping.get(projectionAliasMap.getOrElse(name, name)).map { dc =>
referencedDruidColumns(name) = dc
dc
}
}
def addAlias(alias: String, col: String) = {
val dColNm = projectionAliasMap.getOrElse(col, col)
this.copy(projectionAliasMap = (projectionAliasMap + (alias -> dColNm)))
}
def orderBy(dimName: String, ascending: Boolean): DruidQueryBuilder = limitSpec match {
case Some(LimitSpec(t, l, columns)) => limit(LimitSpec(t, l,
columns :+ new OrderByColumnSpec(dimName, ascending)))
case None => limit(new LimitSpec(Int.MaxValue,
new OrderByColumnSpec(dimName, ascending)))
}
def limit(amt: Int): Option[DruidQueryBuilder] = limitSpec match {
case Some(LimitSpec(t, l, columns)) if (l == Int.MaxValue || l == amt) =>
Some(limit(LimitSpec(t, amt, columns)))
case _ => None
}
/**
* currently we don't transform queries with [[LimitSpec]] or [[HavingSpec]] into
* post DruidOperations in Spark.
*
* @return
*/
def canPushToHistorical = !(
postAggregations.isDefined ||
limitSpec.isDefined ||
havingSpec.isDefined
)
}
object DruidQueryBuilder {
def apply(drInfo: DruidRelationInfo) =
new DruidQueryBuilder(drInfo, new QueryIntervals(drInfo))
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy