
org.sparklinedata.druid.metadata.DruidRelationColumn.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sparklinedata.druid.metadata
import org.sparklinedata.druid.{DruidDataSourceException, Utils}
case class SpatialDruidDimensionInfo(
druidColumn : String,
spatialPosition : Int,
minValue : Option[Double],
maxValue : Option[Double]
)
case class SpatialDruidDimension(
druidColumn : DruidDimension,
spatialPosition : Int,
minValue : Option[Double],
maxValue : Option[Double]
)
case class DruidRelationColumnInfo(
column : String,
druidColumn : Option[String],
spatialIndex : Option[SpatialDruidDimensionInfo] = None,
hllMetric : Option[String] = None,
sketchMetric : Option[String] = None,
cardinalityEstimate : Option[Long] = None
)
/**
* Captures the link(s) of a source column to the Druid Index.
*
* A column ca have several kinds of links to the Druid Index:
* - it can be a Druid Dimension, possibly the Time Dimension of the Druid Index.
* - it can be a component(axis) of a Spatial Index in Druid
* - its value can be stored as a HLL Aggregation in Druid.
* - its value can be stored in a Sketch Aggregation in Druid.
*
* A column can have multiple links, for example:
* - a latitude column can be both a Druid Dimension and be an axis in a Spatial Index.
* - a column can be both a Dimension and have an HLL and/or Sketch.
*
* @param column the source column
* @param druidColumn the direct link of this source coulmn to a Druid Dimension or Metric.
* @param spatialIndex the spatial index for this column.
* @param hllMetric the hll Metric for this column
* @param sketchMetric the sketch for this column
* @param cardinalityEstimate user provided cardinality estimate.
*/
case class DruidRelationColumn(
column : String,
druidColumn : Option[DruidColumn],
spatialIndex : Option[SpatialDruidDimension] = None,
hllMetric : Option[DruidMetric] = None,
sketchMetric : Option[DruidMetric] = None,
cardinalityEstimate : Option[Long] = None
) {
private lazy val druidColumnToUse : DruidColumn = {
Utils.filterSomes(
Seq(druidColumn, hllMetric, sketchMetric, spatialIndex.map(_.druidColumn)).toList
).head.get
}
def hasDirectDruidColumn = druidColumn.isDefined
def hasSpatialIndex = spatialIndex.isDefined
def hasHLLMetric = hllMetric.isDefined
def hasSketchMetric = sketchMetric.isDefined
def name = druidColumnToUse.name
def dataType = if (hasSpatialIndex) DruidDataType.Float else druidColumnToUse.dataType
def size = druidColumnToUse.size
val cardinality : Long = cardinalityEstimate.getOrElse{
if (cardinalityEstimate.isDefined) {
cardinalityEstimate.get
} else {
druidColumnToUse.cardinality
}
}
def isDimension(excludeTime : Boolean = false) : Boolean = {
hasDirectDruidColumn && druidColumnToUse.isDimension(excludeTime)
}
def isTimeDimension : Boolean = {
hasDirectDruidColumn && druidColumnToUse.isInstanceOf[DruidTimeDimension]
}
def isMetric : Boolean = hasDirectDruidColumn && !isDimension(false)
def metric = druidColumnToUse.asInstanceOf[DruidMetric]
}
object DruidRelationColumn {
def apply(druidDS: DruidDataSource,
timeDimensionCol : String,
colInfo: DruidRelationColumnInfo
) : Option[DruidRelationColumn] = {
(colInfo.druidColumn, colInfo.spatialIndex, colInfo.hllMetric, colInfo.sketchMetric) match {
case (Some(dC), None, None, None) if (dC == timeDimensionCol) => {
val dColumn = druidDS.timeDimension.get
Some(
new DruidRelationColumn(colInfo.column,
Some(dColumn),
None, None, None,
colInfo.cardinalityEstimate
)
)
}
case (Some(dC), None, None, None) if druidDS.columns.contains(dC) => {
val dColumn = druidDS.columns(dC)
Some(
new DruidRelationColumn(colInfo.column,
Some(dColumn),
None, None, None,
colInfo.cardinalityEstimate
)
)
}
case (odC, Some(sI), None, None)
if druidDS.columns.contains(sI.druidColumn) &&
druidDS.columns(sI.druidColumn).isDimension() => {
val drC = if (odC.isDefined) {
apply(druidDS,
timeDimensionCol,
colInfo.copy(spatialIndex = None)
)
} else None
if ( odC.isDefined && !drC.isDefined) {
return None
}
Some(
new DruidRelationColumn(colInfo.column,
drC.flatMap(_.druidColumn),
Some(
SpatialDruidDimension(druidDS.columns(sI.druidColumn).asInstanceOf[DruidDimension],
sI.spatialPosition, sI.minValue, sI.maxValue)
),
None, None,
colInfo.cardinalityEstimate
)
)
}
case (odC, None, hllMetric, sketchMetric)
if hllMetric.isDefined || sketchMetric.isDefined => {
val drC = if (odC.isDefined) {
apply(druidDS,
timeDimensionCol,
colInfo.copy(hllMetric = None, sketchMetric = None)
)
} else None
if ( odC.isDefined && !drC.isDefined) {
return None
}
var hllM : Option[DruidMetric] = None
var sketchM : Option[DruidMetric] = None
if ( hllMetric.isDefined) {
if (!druidDS.columns.contains(hllMetric.get) ||
druidDS.columns(hllMetric.get).isDimension() ) {
return None
}
hllM = Some(druidDS.columns(hllMetric.get).asInstanceOf[DruidMetric])
}
if ( sketchM.isDefined) {
if (!druidDS.columns.contains(sketchMetric.get) ||
druidDS.columns(sketchMetric.get).isDimension() ) {
return None
}
sketchM = Some(druidDS.columns(sketchMetric.get).asInstanceOf[DruidMetric])
}
if (hllM.isDefined || sketchM.isDefined) {
Some(new DruidRelationColumn(colInfo.column,
drC.flatMap(_.druidColumn),
None,
hllM, sketchM,
colInfo.cardinalityEstimate
))
} else {
None
}
}
case _ => None
}
}
def apply(dC : DruidColumn) : DruidRelationColumn = {
new DruidRelationColumn(dC.name,
Some(dC),
None,
None, None,
None
)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy