Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.crobox.clickhouse.dsl.language.ClickhouseTokenizerModule.scala Maven / Gradle / Ivy
package com.crobox.clickhouse.dsl.language
import com.crobox.clickhouse.ClickhouseServerVersion
import com.crobox.clickhouse.dsl.JoinQuery._
import com.crobox.clickhouse.dsl._
import com.crobox.clickhouse.time.{MultiDuration, TimeUnit, TotalDuration}
import com.typesafe.scalalogging.Logger
import org.joda.time.{DateTime, DateTimeZone}
import org.slf4j.LoggerFactory
import scala.jdk.CollectionConverters._
case class TokenizeContext(
version: ClickhouseServerVersion,
var joinNr: Int = 0,
var tableAliases: Map[Table, String] = Map.empty,
var useTableAlias: Boolean = false,
delimiter: String = ", "
) {
def incrementJoinNumber(): Unit = joinNr += 1
def tableAlias(table: Table): String =
if (useTableAlias) {
tableAliases.getOrElse(
table, {
val alias = " AS " + ClickhouseStatement.quoteIdentifier("T" + (tableAliases.size + 1))
tableAliases += (table -> alias)
alias
}
)
} else ""
def setTableAlias(value: Boolean): TokenizeContext =
// change this object since we want to maintain all tableAliases over all joins
if (true) {
useTableAlias = value
this
} else {
copy(useTableAlias = value)
}
def leftAlias(alias: Option[String]): String = ClickhouseStatement.quoteIdentifier(alias.getOrElse("L" + joinNr))
def rightAlias(alias: Option[String]): String = ClickhouseStatement.quoteIdentifier(alias.getOrElse("R" + joinNr))
}
trait ClickhouseTokenizerModule
extends TokenizerModule
with AggregationFunctionTokenizer
with ArithmeticFunctionTokenizer
with ArrayFunctionTokenizer
with BitFunctionTokenizer
with ComparisonFunctionTokenizer
with DateTimeFunctionTokenizer
with DictionaryFunctionTokenizer
with EncodingFunctionTokenizer
with HashFunctionTokenizer
with HigherOrderFunctionTokenizer
with IPFunctionTokenizer
with InFunctionTokenizer
with JsonFunctionTokenizer
with LogicalFunctionTokenizer
with MathematicalFunctionTokenizer
with MiscellaneousFunctionTokenizer
with NullableFunctionTokenizer
with RandomFunctionTokenizer
with RoundingFunctionTokenizer
with SplitMergeFunctionTokenizer
with StringFunctionTokenizer
with StringSearchFunctionTokenizer
with TypeCastFunctionTokenizer
with URLFunctionTokenizer
with EmptyFunctionTokenizer {
private lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName))
protected def tokenizeSeqCol(col1: Column, columns: Column*)(implicit ctx: TokenizeContext): String = {
val prefix = if (columns.isEmpty) "" else ", "
tokenizeColumn(col1) + prefix + tokenizeSeqCol(columns: _*)
}
protected def tokenizeSeqCol(columns: Column*)(implicit ctx: TokenizeContext): String =
columns.map(tokenizeColumn).mkString(ctx.delimiter)
override def toSql(query: InternalQuery, formatting: Option[String] = Some("JSON"))(implicit
ctx: TokenizeContext
): String = {
val formatSql = formatting.map(fmt => " FORMAT " + fmt).getOrElse("")
val sql = removeRedundantWhitespaces(toRawSql(query) + formatSql)
logger.debug(s"Generated sql [$sql]")
sql
}
private def removeSurroundingBrackets(value: String): String =
if (value.startsWith("(") && value.endsWith(")") && value.count(_ == '(') == 1 && value.count(_ == ')') == 1) {
value.substring(1, value.length - 1)
} else value
private def removeRedundantWhitespaces(value: String): String =
value
.replaceAll("\\s+", " ")
.replace(" ( ", " (")
.replace(" )", ")")
.trim
private[language] def toRawSql(query: InternalQuery)(implicit ctx: TokenizeContext): String =
query match {
case InternalQuery(select, from, prewhere, where, groupBy, having, join, orderBy, limit, union) =>
s"""
|${tokenizeSelect(select)}
| ${tokenizeFrom(from)}
| ${tokenizeJoin(select, from, join)}
| ${tokenizeFiltering(prewhere, "PREWHERE")}
| ${tokenizeFiltering(where, "WHERE")}
| ${tokenizeGroupBy(groupBy)}
| ${tokenizeFiltering(having, "HAVING")}
| ${tokenizeOrderBy(orderBy)}
| ${tokenizeLimit(limit)}
| ${tokenizeUnionAll(union)}""".stripMargin
}
private def tokenizeUnionAll(unions: Seq[OperationalQuery])(implicit ctx: TokenizeContext): String =
if (unions.nonEmpty) unions.map(q => s"UNION ALL ${toRawSql(q.internalQuery)}").mkString else ""
private def tokenizeSelect(select: Option[SelectQuery])(implicit ctx: TokenizeContext): String =
select match {
case Some(s) => s"SELECT ${s.modifier} ${tokenizeColumns(s.columns)}"
case _ => ""
}
private def tokenizeFrom(from: Option[FromQuery], withPrefix: Boolean = true)(implicit
ctx: TokenizeContext
): String = {
require(from != null)
val fromClause = from match {
case Some(query: InnerFromQuery) => s"(${toRawSql(query.innerQuery.internalQuery).trim})"
case Some(table: TableFromQuery[_]) => table.table.quoted + ctx.tableAlias(table.table)
case _ => return ""
}
val prefix = if (withPrefix) "FROM" else ""
val alias = from.flatMap(_.alias.map(s => " AS " + ClickhouseStatement.quoteIdentifier(s))).getOrElse("")
val asF = if (from.exists(_.finalized)) " FINAL" else ""
s"$prefix $fromClause $alias $asF".trim
}
protected def tokenizeColumn(column: Column)(implicit ctx: TokenizeContext): String = {
require(column != null)
column match {
case EmptyColumn => ""
case alias: AliasedColumn[_] =>
val originalColumnToken = tokenizeColumn(alias.original)
if (originalColumnToken.isEmpty) alias.quoted else s"$originalColumnToken AS ${alias.quoted}"
case tuple: TupleColumn[_] => s"(${tuple.elements.map(tokenizeColumn).mkString(ctx.delimiter)})"
case col: ExpressionColumn[_] => tokenizeExpressionColumn(col)
case col: Column => col.quoted
}
}
private def tokenizeExpressionColumn(inCol: ExpressionColumn[_])(implicit ctx: TokenizeContext): String =
inCol match {
case agg: AggregateFunction[_] => tokenizeAggregateFunction(agg)
case col: ArithmeticFunctionCol[_] => tokenizeArithmeticFunctionColumn(col)
case col: ArithmeticFunctionOp[_] => tokenizeArithmeticFunctionOperator(col)
case col: ArrayFunction => tokenizeArrayFunction(col)
case col: BitFunction => tokenizeBitFunction(col)
case col: ComparisonColumn => tokenizeComparisonColumn(col)
case col: DateTimeFunctionCol[_] => tokenizeDateTimeColumn(col)
case col: DateTimeConst[_] => tokenizeDateTimeConst(col)
case col: DictionaryFuncColumn[_] => tokenizeDictionaryFunction(col)
case col: EmptyFunction[_] => tokenizeEmptyCol(col)
case col: EncodingFunction[_] => tokenizeEncodingFunction(col)
case col: HashFunction => tokenizeHashFunction(col)
case col: HigherOrderFunction[_, _, _] => tokenizeHigherOrderFunction(col)
case col: IPFunction[_] => tokenizeIPFunction(col)
case col: InFunction => tokenizeInFunction(col)
case col: JsonFunction[_] => tokenizeJsonFunction(col)
case col: LogicalFunction => tokenizeLogicalFunction(col)
case col: MathFuncColumn => tokenizeMathematicalFunction(col)
case col: MiscellaneousFunction => tokenizeMiscellaneousFunction(col)
case col: NullableFunction => tokenizeNullableFunction(col)
case col: RandomFunction => tokenizeRandomFunction(col)
case col: RoundingFunction => tokenizeRoundingFunction(col)
case col: SplitMergeFunction[_] => tokenizeSplitMergeFunction(col)
case col: StringFunctionCol[_] => tokenizeStringCol(col)
case col: StringSearchFunc[_] => tokenizeStringSearchFunction(col)
case col: TypeCastColumn[_] => tokenizeTypeCastColumn(col)
case col: URLFunction[_] => tokenizeURLFunction(col)
case All() => "*"
case RawColumn(rawSql) => rawSql
case Conditional(cases, default, multiIf) =>
if (multiIf) {
s"${if (cases.size > 1) "multiIf" else "if"}(${cases
.map(`case` => s"${tokenizeColumn(`case`.condition)}, ${tokenizeColumn(`case`.result)}")
.mkString(", ")}, ${tokenizeColumn(default)})"
} else {
s"CASE ${cases
.map(`case` => s"WHEN ${tokenizeColumn(`case`.condition)} THEN ${tokenizeColumn(`case`.result)}")
.mkString(" ")} ELSE ${tokenizeColumn(default)} END"
}
case c: Const[_] => c.parsed
case a @ _ =>
throw new NotImplementedError(
a.getClass.getCanonicalName + " with superclass " + a.getClass.getSuperclass.getCanonicalName + " could not be matched."
)
}
private[language] def tokenizeTimeSeries(timeSeries: TimeSeries)(implicit ctx: TokenizeContext): String = {
val column = tokenizeColumn(timeSeries.tableColumn)
tokenizeDuration(timeSeries, column)
}
private def tokenizeDuration(timeSeries: TimeSeries, column: String): String = {
val interval = timeSeries.interval
val dateZone = determineZoneId(interval.rawStart)
def convert(fn: String): String = s"$fn(toDateTime($column / 1000), '$dateZone')"
def toDateTime(inner: String): String = s"toDateTime($inner, '$dateZone')"
def toUnixTimestamp(inner: String): String = s"toUnixTimestamp($inner, '$dateZone')"
interval.duration match {
case TotalDuration => s"${interval.getStartMillis}"
case MultiDuration(1, TimeUnit.Year) => toDateTime(convert("toStartOfYear"))
case MultiDuration(1, TimeUnit.Quarter) => toDateTime(convert("toStartOfQuarter"))
case MultiDuration(1, TimeUnit.Month) => toDateTime(convert("toStartOfMonth"))
case MultiDuration(1, TimeUnit.Week) => toDateTime(convert("toMonday"))
case MultiDuration(1, TimeUnit.Day) => convert("toStartOfDay")
case MultiDuration(1, TimeUnit.Hour) => convert("toStartOfHour")
case MultiDuration(1, TimeUnit.Minute) => convert("toStartOfMinute")
case MultiDuration(1, TimeUnit.Second) => toDateTime(s"$column / 1000")
case MultiDuration(nth, TimeUnit.Year) =>
toDateTime(s"subtractYears(${convert("toStartOfYear")}, ${convert("toRelativeYearNum")} % $nth)")
case MultiDuration(nth, TimeUnit.Quarter) =>
toDateTime(s"subtractMonths(${convert("toStartOfQuarter")}, (${convert("toRelativeQuarterNum")} % $nth) * 3)")
case MultiDuration(nth, TimeUnit.Month) =>
toDateTime(s"subtractMonths(${convert("toStartOfMonth")}, ${convert("toRelativeMonthNum")} % $nth)")
case MultiDuration(nth, TimeUnit.Week) =>
toDateTime(s"subtractWeeks(${convert("toMonday")}, (${convert("toRelativeWeekNum")} - 1) % $nth)")
case MultiDuration(nth, TimeUnit.Day) =>
s"subtractDays(${convert("toStartOfDay")}, ${convert("toRelativeDayNum")} % $nth, '$dateZone')"
case MultiDuration(nth, TimeUnit.Hour) =>
s"subtractHours(${convert("toStartOfHour")}, ${convert("toRelativeHourNum")} % $nth, '$dateZone')"
case MultiDuration(nth, TimeUnit.Minute) =>
s"subtractMinutes(${convert("toStartOfMinute")}, ${convert("toRelativeMinuteNum")} % $nth, '$dateZone')"
case MultiDuration(nth, TimeUnit.Second) =>
s"subtractSeconds(${toDateTime(s"$column / 1000")}, ${convert("toRelativeSecondNum")} % $nth, '$dateZone')"
case unsupported => throw new IllegalArgumentException(s"Unsupported interval: $unsupported")
}
}
// TODO this is a fallback to find a similar timezone when the provided interval does not have a set timezone id.
// We should be able to disable this from the config and fail fast if we cannot determine the timezone for timeseries
// (probably default to failing)
private def determineZoneId(start: DateTime): String = {
val provider = DateTimeZone.getProvider
val zones = provider.getAvailableIDs.asScala.map(provider.getZone)
val zone = start.getZone
val targetZone = zones
.find(_.getID == zone.getID)
.orElse(
zones.find(targetZone =>
targetZone.getID.startsWith("Etc/") &&
targetZone.getOffset(start.getMillis) == start.getZone.getOffset(start.getMillis)
)
)
.getOrElse(throw new IllegalArgumentException(s"Could not determine the zone from source $zone"))
.getID
targetZone
}
// Table joins are tokenized as select * because of https://github.com/yandex/ClickHouse/issues/635
private def tokenizeJoin(select: Option[SelectQuery], from: Option[FromQuery], join: Option[JoinQuery])(implicit
ctx: TokenizeContext
): String =
join match {
case Some(query) =>
ctx.incrementJoinNumber()
// we always need to provide an alias to the RIGHT side
val right = query.other match {
case table: TableFromQuery[_] => s"(SELECT * ${tokenizeFrom(Some(table))})"
case query: InnerFromQuery => tokenizeFrom(Some(query), withPrefix = false)
}
val leftAlias = if (from.flatMap(_.alias).isEmpty) s"AS ${ctx.leftAlias(from.flatMap(_.alias))}" else ""
val rightAlias = s"AS ${ctx.rightAlias(query.other.alias)}"
s""" $leftAlias
| ${if (query.global) "GLOBAL " else ""}
| ${tokenizeJoinType(query.joinType)}
| $right $rightAlias
| ${tokenizeJoinKeys(select, from.get, query)}""".trim.stripMargin
.replaceAll("\n", "")
.replaceAll("\r", "")
case None => ""
}
private def tokenizeJoinKeys(select: Option[SelectQuery], from: FromQuery, query: JoinQuery)(implicit
ctx: TokenizeContext
): String = {
val using = query.using.filterNot {
case EmptyColumn => true
case _ => false
}
query.joinType match {
case CrossJoin =>
assert(`using`.isEmpty, "When using CrossJoin, no using columns should be provided")
assert(query.on.isEmpty, "When using CrossJoin, no on conditions should be provided")
""
case _ =>
assert(`using`.nonEmpty || query.on.nonEmpty, s"No USING or ON provided for joinType: ${query.joinType}")
assert(!(`using`.nonEmpty && query.on.nonEmpty), s"Both USING and ON provided for joinType: ${query.joinType}")
if (`using`.nonEmpty) {
// TOKENIZE USING
if (`using`.size == 1) s"USING ${using.head.name}"
else s"USING (${using.map(_.name).mkString(ctx.delimiter)})"
} else if (query.on.nonEmpty) {
// TOKENIZE ON. If the fromClause is a TABLE, we need to check on aliases!
"ON " + query.on
.map { cond =>
val left = verifyOnCondition(select, from, cond.left)
s"${ctx.leftAlias(from.alias)}.$left ${cond.operator} ${ctx.rightAlias(query.other.alias)}.${cond.right.name}"
}
.mkString(" AND ")
} else ""
}
}
private def verifyOnCondition(select: Option[SelectQuery], from: FromQuery, joinKey: Column): String =
from match {
case _: TableFromQuery[_] =>
// check if joinKey is an existing DB field or an alias!
select
.map(_.columns)
.getOrElse(Seq.empty)
.flatMap {
case x: AliasedColumn[_] => if (x.alias == joinKey.name) Option(x.original.name) else None
case _ => None
}
.headOption
.getOrElse(joinKey.name)
case _ => joinKey.name
}
private[language] def tokenizeColumns(columns: Seq[Column])(implicit ctx: TokenizeContext): String =
columns
.filterNot {
case EmptyColumn => true
case _ => false
}
.map(tokenizeColumn)
.mkString(", ")
/**
* https://clickhouse.tech/docs/en/sql-reference/statements/select/join/
*/
private def tokenizeJoinType(joinType: JoinQuery.JoinType): String =
joinType match {
// Standard SQL JOIN https://en.wikipedia.org/wiki/Join_(SQL)
case InnerJoin => "INNER JOIN"
case LeftOuterJoin => "LEFT OUTER JOIN"
case RightOuterJoin => "RIGHT OUTER JOIN"
case FullOuterJoin => "FULL OUTER JOIN"
case CrossJoin => "CROSS JOIN"
// custom clickhouse
case AllInnerJoin => "ALL INNER JOIN"
case AllLeftJoin => "ALL LEFT JOIN"
case AllRightJoin => "ALL RIGHT JOIN"
case AntiLeftJoin => "ANTI LEFT JOIN"
case AntiRightJoin => "ANTI RIGHT JOIN"
case AnyInnerJoin => "ANY INNER JOIN"
case AnyLeftJoin => "ANY LEFT JOIN"
case AnyRightJoin => "ANY RIGHT JOIN"
case AsOfJoin => "ASOF JOIN"
case AsOfLeftJoin => "ASOF LEFT JOIN"
case SemiLeftJoin => "SEMI LEFT JOIN"
case SemiRightJoin => "SEMI RIGHT JOIN"
}
private def tokenizeFiltering(maybeCondition: Option[TableColumn[Boolean]], keyword: String)(implicit
ctx: TokenizeContext
): String =
maybeCondition match {
case None => ""
case Some(condition) =>
// s"$keyword ${tokenizeColumn(condition)}"
s"$keyword ${removeSurroundingBrackets(tokenizeColumn(condition).trim)}"
}
private def tokenizeGroupBy(groupBy: Option[GroupByQuery]): String = {
val groupByColumns = groupBy match {
case Some(GroupByQuery(usingColumns, _, _)) if usingColumns.nonEmpty =>
Some(s"GROUP BY ${tokenizeColumnsAliased(usingColumns)}")
case _ =>
None
}
val groupByMode = groupBy match {
case Some(GroupByQuery(_, Some(mode), _)) =>
Some(mode match {
case GroupByQuery.WithRollup => "WITH ROLLUP"
case GroupByQuery.WithCube => "WITH CUBE"
})
case _ => None
}
val groupByWithTotals = groupBy match {
case Some(GroupByQuery(_, _, true)) => Some("WITH TOTALS")
case _ => None
}
(groupByColumns ++ groupByMode ++ groupByWithTotals).mkString(" ")
}
private def tokenizeOrderBy(orderBy: Seq[(Column, OrderingDirection)])(implicit ctx: TokenizeContext): String =
orderBy.toList match {
case Nil | null => ""
case _ => s"ORDER BY ${tokenizeTuplesAliased(orderBy)}"
}
private def tokenizeLimit(limit: Option[Limit]): String =
limit match {
case None => ""
case Some(Limit(size, offset)) => s"LIMIT $offset, $size"
}
private def tokenizeColumnsAliased(columns: Seq[Column]): String =
columns.map(aliasOrName).mkString(", ")
private def tokenizeTuplesAliased(columns: Seq[(Column, OrderingDirection)])(implicit ctx: TokenizeContext): String =
columns
.map { case (column, dir) =>
tokenizeColumn(column) + " " + direction(dir)
}
.mkString(", ")
private def aliasOrName(column: Column) =
column match {
case EmptyColumn => ""
case alias: AliasedColumn[_] => alias.quoted
case col: Column => col.quoted
}
private def direction(dir: OrderingDirection): String =
dir match {
case ASC => "ASC"
case DESC => "DESC"
}
}