All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.maha.service.calcite.MahaCalciteSqlParser.scala Maven / Gradle / Ivy

package com.yahoo.maha.service.calcite

import com.yahoo.maha.core._
import com.yahoo.maha.core.fact.PublicFact
import com.yahoo.maha.core.registry.Registry
import com.yahoo.maha.core.request.{ASC, DESC, Field, GroupByQuery, Order, PaginationConfig, ReportingRequest, SortBy, SyncRequest}
import com.yahoo.maha.service.MahaServiceConfig
import com.yahoo.maha.service.error.MahaCalciteSqlParserError
import grizzled.slf4j.Logging
import org.apache.calcite.sql._
import org.apache.calcite.sql.fun.SqlLikeOperator
import org.apache.calcite.sql.parser.{SqlParseException, SqlParser}
import org.apache.calcite.sql.validate.{SqlConformance, SqlConformanceEnum}
import org.apache.commons.lang.StringUtils
import org.joda.time.{DateTime, DateTimeZone}

import scala.collection.mutable.ArrayBuffer

trait MahaCalciteSqlParser {
  def parse(sql:String, schema: Schema, registryName:String) : MahaSqlNode
}

case class DefaultMahaCalciteSqlParser(mahaServiceConfig: MahaServiceConfig) extends MahaCalciteSqlParser with Logging {

  lazy protected[this] val fromDate : String = DailyGrain.toFormattedString(DateTime.now(DateTimeZone.UTC).minusDays(7))
  lazy protected[this] val toDate : String = DailyGrain.toFormattedString(DateTime.now(DateTimeZone.UTC))

  val DEFAULT_DAY_FILTER : Filter = BetweenFilter("Day", fromDate, toDate)
  val DAY = "Day"
  val config = SqlParser.config().withConformance(SqlConformanceEnum.LENIENT)
  import scala.collection.JavaConverters._

  override def parse(sql: String, schema: Schema, registryName:String): MahaSqlNode = {
    require(!StringUtils.isEmpty(sql), MahaCalciteSqlParserError("Sql can not be empty", sql))
    require(mahaServiceConfig.registry.contains(registryName), s"failed to find the registry ${registryName} in the mahaService Config")
    val registry:Registry  = mahaServiceConfig.registry.get(registryName).get.registry
    val parser: SqlParser = SqlParser.create(sql, config)
    try {
      val topSqlNode: SqlNode = parser.parseQuery
      val orderByList: IndexedSeq[SortBy] = {
        if (topSqlNode.isInstanceOf[SqlOrderBy]) {
          val orderBySql = topSqlNode.asInstanceOf[SqlOrderBy]
          orderBySql.orderList.asScala.map {
            case sqlOrderByBasic:SqlBasicCall=>
              require(sqlOrderByBasic.operands.size>0, s"Missing field and Order by Clause ${sqlOrderByBasic}")
              val order:Order = {
                if (sqlOrderByBasic.getOperator!=null && sqlOrderByBasic.getKind == SqlKind.DESCENDING) {
                  DESC
                } else ASC
              }
              SortBy(toLiteral(sqlOrderByBasic.operands(0)), order)
            case sqlString:SqlCharStringLiteral=>
              SortBy(toLiteral(sqlString), ASC)
            case other =>
             throw new IllegalArgumentException(s"Maha Calcite: order by case ${other} not supported")
          }.toIndexedSeq
        } else IndexedSeq.empty
      }
      val sqlNode:SqlNode = topSqlNode match {
         case sqlSelect: SqlSelect=> sqlSelect
         case sqlOrderBy: SqlOrderBy=> sqlOrderBy.query
         case sqlDescribeTable: SqlDescribeTable => sqlDescribeTable
         case e=>
           throw new IllegalArgumentException(s"Query type ${e.getKind} is not supported by Maha-Calcite")
      }

      val (startIndex, maxRow) = topSqlNode match {
        case sqlOrderBy: SqlOrderBy =>
          val si =  sqlOrderBy.offset match {
            case sqlNumericLiteral: SqlNumericLiteral => sqlNumericLiteral.toValue.toInt
            case null => 0
            case e =>
            warn(s"Offset type ${e.getKind} is not supported for si by Maha-Calcite, setting to 0")
            0
          }
          val mr = sqlOrderBy.fetch match {
            case sqlNumericLiteral: SqlNumericLiteral => sqlNumericLiteral.toValue.toInt
            case null => -1
            case e =>
            warn(s"Offset type ${e.getKind} is not supported for mr by Maha-Calcite, setting to -1")
            -1
          }
          (si, mr)
        case e=>
          warn(s"Query type ${e.getKind} is not supported for si/mr by Maha-Calcite, setting to 0/-1")
          (0, -1)
      }

      sqlNode match {
        case sqlSelect: SqlSelect =>
          val (fromTable, tableSchema): (String, Option[String]) = getFromTableAndTableSchema(sqlSelect.getFrom)
          val publicFact = getCube(fromTable, registry)
          require(publicFact.isDefined,MahaCalciteSqlParserError(s"Failed to find the cube $fromTable", sql))
          val columnAliasToColumnMap:Map[String, PublicColumn] = publicFact.get.getAllDomainColumnAliasToPublicColumnMap(registry)
          val selectFields = getSelectList(sqlSelect.getSelectList, publicFact.get, columnAliasToColumnMap)
          val (filterExpression, dayFilterOption, hourFilterOption, minuteFilterOption, numDays) = getFilterList(sqlSelect, publicFact.get)
          SelectSqlNode(
            new ReportingRequest(
              cube=publicFact.get.name,
              selectFields = selectFields,
              filterExpressions = filterExpression,
              requestType = SyncRequest,
              sortBy = orderByList,
              schema = schema,
              reportDisplayName = None,
              forceDimensionDriven = false,
              forceFactDriven = false,
              includeRowCount = false,
              dayFilter = dayFilterOption.getOrElse(DEFAULT_DAY_FILTER),
              hourFilter = hourFilterOption,
              minuteFilter = minuteFilterOption,
              numDays = numDays,
              curatorJsonConfigMap = Map.empty,
              additionalParameters = Map.empty,
              queryType = GroupByQuery,
              pagination = PaginationConfig(Map.empty),
              paginationStartIndex = startIndex,
              rowsPerPage = maxRow
            )
          )
        case sqlDescribeTable: SqlDescribeTable =>
          val (fromTable, tableSchema): (String, Option[String]) = getFromTableAndTableSchema(sqlDescribeTable.getTable)
          val cubeOption = getCube(fromTable, registry)
          require (cubeOption.isDefined, s"Failed to find the table $fromTable in the registry")
          DescribeSqlNode(cubeOption.get.name)
        case e=>
          throw new IllegalArgumentException(s"Query type ${e.getKind} is not supported by Maha-Calcite")
      }
    }
    catch {
      case e: SqlParseException =>
        val error = s"Calcite Error: Failed to parse SQL ${sql} ${e.getMessage}"
        logger.error(error, e)
        throw e;
    }
  }

  def getCube(fromTable: String, registry:Registry) : Option[PublicFact] = {
    registry.getFact(fromTable)
  }

  def getFromTableAndTableSchema(sqlNode: SqlNode): (String, Option[String]) = {
    val sqlFromNode = sqlNode match {
      case sqlBasicCall: SqlBasicCall =>
        sqlBasicCall.getOperator.kind match {
          case SqlKind.AS => sqlBasicCall.getOperands()(0)
          case _ => sqlNode
        }
      case _ => sqlNode
    }
    sqlFromNode match {
      case sqlIdentifier: SqlIdentifier =>
        val fromTableAndTableSchema = sqlIdentifier.names
        if (fromTableAndTableSchema.size == 1)
          (fromTableAndTableSchema.get(0).toLowerCase, None)
        else if (fromTableAndTableSchema.size == 2)
          (fromTableAndTableSchema.get(1).toLowerCase, Some(fromTableAndTableSchema.get(0).toLowerCase))
        else
          throw new IllegalArgumentException(s"Incorrect FROM clause. Expected FROM table or FROM schema.table")
      case _ =>
        throw new IllegalArgumentException(s"Incorrect FROM clause. Expected FROM table or FROM schema.table")
    }
  }

  def getSelectList(sqlNode: SqlNode, publicFact: PublicFact, columnAliasToColumnMap:Map[String, PublicColumn]) : IndexedSeq[Field] = {
    sqlNode match {
      case sqlNodeList: SqlNodeList =>
        if (sqlNodeList.size() > 0) {
          var arrayBuffer: ArrayBuffer[Field] = ArrayBuffer.empty
          val iter = sqlNodeList.iterator()
          while (iter.hasNext) {
            val subNode = iter.next();
            subNode match {
              case sqlIdentifier: SqlIdentifier =>
                if (sqlIdentifier.isStar) {
                  val indexedSeq: IndexedSeq[Field]=
                    publicFact.factCols.map(publicFactCol => Field(publicFactCol.alias, None, None)).toIndexedSeq ++
                      publicFact.dimCols.map(publicDimCol => Field(publicDimCol.alias, None, None)).toIndexedSeq
                  return indexedSeq
                }
                else {
                  sqlIdentifier.names.asScala.foreach(
                    c => {
                      val publicCol: PublicColumn = getColumnFromPublicFact(publicFact, c, columnAliasToColumnMap)
                      arrayBuffer += Field(publicCol.alias, None, None)
                    }
                  )
                }
              case sqlCharStringLiteral: SqlCharStringLiteral =>
                val publicCol: PublicColumn = getColumnFromPublicFact(publicFact, toLiteral(sqlCharStringLiteral), columnAliasToColumnMap)
                arrayBuffer += Field(publicCol.alias, None, None)
              case sqlBasicCall: SqlBasicCall =>
                val publicCol: PublicColumn = getColumnFromPublicFact(publicFact, toLiteral(sqlBasicCall.operands(0)), columnAliasToColumnMap)
                arrayBuffer += Field(publicCol.alias, None, None)
              case other: AnyRef =>
                val errMsg = s"sqlNode type${other.getClass.toString} in getSelectList is not yet supported"
                logger.error(errMsg);
            }
          }
          arrayBuffer.toIndexedSeq
        } else {
          IndexedSeq.empty
        }
      case e=>
        IndexedSeq.empty
    }
  }

  def getColumnFromPublicFact(publicFact: PublicFact, alias: String, columnAliasToColumnMap:Map[String, PublicColumn]): PublicColumn = {
    require(columnAliasToColumnMap.contains(alias),  s"Failed to find the column ${alias} in cube ${publicFact.name}")
    columnAliasToColumnMap.get(alias).get
  }

  def getFilterList(sqlNode: SqlNode, publicFact: PublicFact) : (IndexedSeq[Filter], Option[Filter], Option[Filter], Option[Filter], Int) = {
    val sqlWhere = sqlNode.asInstanceOf[SqlSelect].getWhere
    val sqlHaving = sqlNode.asInstanceOf[SqlSelect].getHaving
    var filterList = ArrayBuffer.empty[Filter]

    sqlWhere match {
      case sqlBasicCall: SqlBasicCall =>
        filterList = filterList ++ constructFilters(sqlBasicCall)
      case others: AnyRef =>
        logger.error(s"sqlNode type ${sqlNode.getKind} in getSelectList is not yet supported")
        return (IndexedSeq.empty, None, None, None, 1)
      case null =>
        logger.error("Empty WHERE clause.")
        return (IndexedSeq.empty, None, None, None, 1)
    }

    sqlHaving match {
      case sqlBasicCall: SqlBasicCall =>
        filterList = filterList ++ constructFilters(sqlBasicCall)
      case others: AnyRef =>
        logger.error(s"sqlNode type ${sqlNode.getKind} in getSelectList is not yet supported")
        (IndexedSeq.empty, None, None, None, 1)
      case null =>
        logger.debug("Empty HAVING clause.")
    }
    validate(filterList)
  }

  def constructFilters(sqlNode: SqlNode): ArrayBuffer[Filter] = {
    require(sqlNode.isInstanceOf[SqlBasicCall], s"type ${sqlNode.getKind} not supported in construct current filter")
    val sqlBasicCall: SqlBasicCall = sqlNode.asInstanceOf[SqlBasicCall]
    val operands = sqlBasicCall.getOperands
    sqlBasicCall.getOperator.kind match {
      case SqlKind.AND =>
        constructFilters(operands(0)) ++ constructFilters(operands(1))
      case SqlKind.NOT_IN =>
        val notInList: List[String] = operands(1).asInstanceOf[SqlNodeList].toArray.toList.map(sqlNode => toLiteral(sqlNode))
        ArrayBuffer.empty += NotInFilter(toLiteral(operands(0)), notInList).asInstanceOf[Filter]
      case SqlKind.OR =>
        val mergeBuffer: ArrayBuffer[Filter] = constructFilters(operands(0)) ++ constructFilters(operands(1))
        ArrayBuffer.empty += OrFilter(mergeBuffer.toList).asInstanceOf[Filter]
      case SqlKind.EQUALS =>
        ArrayBuffer.empty += EqualityFilter(toLiteral(operands(0)), toLiteral(operands(1))).asInstanceOf[Filter]
      case SqlKind.NOT_EQUALS =>
        ArrayBuffer.empty += NotEqualToFilter(toLiteral(operands(0)), toLiteral(operands(1))).asInstanceOf[Filter]
      case SqlKind.GREATER_THAN =>
        ArrayBuffer.empty += GreaterThanFilter(toLiteral(operands(0)), toLiteral(operands(1))).asInstanceOf[Filter]
      case SqlKind.IN =>
        val inList: List[String] = operands(1).asInstanceOf[SqlNodeList].toArray.toList.map(sqlNode => toLiteral(sqlNode))
        ArrayBuffer.empty += InFilter(toLiteral(operands(0)), inList).asInstanceOf[Filter]
      case SqlKind.BETWEEN =>
        ArrayBuffer.empty += BetweenFilter(toLiteral(operands(0)), toLiteral(operands(1)), toLiteral(operands(2))).asInstanceOf[Filter]
      case SqlKind.LIKE =>
        if (sqlBasicCall.getOperator.asInstanceOf[SqlLikeOperator].isNegated)
          ArrayBuffer.empty += NotLikeFilter(toLiteral(operands(0)), toLiteral(operands(1))).asInstanceOf[Filter]
        else
          ArrayBuffer.empty += LikeFilter(toLiteral(operands(0)), toLiteral(operands(1))).asInstanceOf[Filter]
      case SqlKind.IS_NULL =>
        ArrayBuffer.empty += IsNullFilter(toLiteral(operands(0))).asInstanceOf[Filter]
      case SqlKind.LESS_THAN=>
        ArrayBuffer.empty += LessThanFilter(toLiteral(operands(0)), toLiteral(operands(1))).asInstanceOf[Filter]
    }
  }

  def toLiteral(sqlNode: SqlNode): String = {
    if(sqlNode != null)
    sqlNode.toString.replaceAll("^[\"']+|[\"']+$", "")
    else ""
  }

  def validate(arrayBuffer: ArrayBuffer[Filter]): (IndexedSeq[Filter], Option[Filter], Option[Filter], Option[Filter], Int)= {
    val attributeAndMetricFilters = ArrayBuffer.empty[Filter]
    var dayFilter: Option[Filter] = None
    var hourFilter: Option[Filter] = None
    var minuteFilter: Option[Filter] = None
    var numDays = 1

    arrayBuffer.foreach {
      filter =>
        if (filter.field == DailyGrain.DAY_FILTER_FIELD) {

          dayFilter = Option(filter)
        } else if (filter.field == HourlyGrain.HOUR_FILTER_FIELD) {
          hourFilter = Option(filter)
        } else if (filter.field == MinuteGrain.MINUTE_FILTER_FIELD) {
          minuteFilter = Option(filter)
        } else {
          attributeAndMetricFilters += filter
        }
    }

    if (dayFilter.isEmpty)
      dayFilter = Option(DEFAULT_DAY_FILTER)

    //validate day filter
    require(dayFilter.isDefined, "Day filter not found in list of filters!")
    dayFilter.map(DailyGrain.validateFilterAndGetNumDays).foreach(nd => numDays = nd)

    (attributeAndMetricFilters.toIndexedSeq, dayFilter, hourFilter, minuteFilter, numDays)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy