All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ignite.spark.impl.optimization.package.scala Maven / Gradle / Ivy

Go to download

Java-based middleware for in-memory processing of big data in a distributed environment.

There is a newer version: 2.13.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.spark.impl


import org.apache.ignite.IgniteException
import org.apache.ignite.spark.impl.optimization.accumulator.{QueryAccumulator, SingleTableSQLAccumulator}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count}
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, ExprId, Expression, NamedExpression}
import org.apache.spark.sql.types._

import scala.annotation.tailrec

/**
  */
package object optimization {
    /**
      * Constant to store alias in column metadata.
      */
    private[optimization] val ALIAS: String = "alias"

    /**
      * All `SupportedExpression` implementations.
      */
    private val SUPPORTED_EXPRESSIONS: List[SupportedExpressions] = List (
        SimpleExpressions,
        SystemExpressions,
        AggregateExpressions,
        ConditionExpressions,
        DateExpressions,
        MathExpressions,
        StringExpressions
    )

    /**
      * @param expr Expression.
      * @param useQualifier If true outputs attributes of `expr` with qualifier.
      * @param useAlias If true outputs `expr` with alias.
      * @return String representation of expression.
      */
    def exprToString(expr: Expression, useQualifier: Boolean = false, useAlias: Boolean = true): String = {
        @tailrec
        def exprToString0(expr: Expression, supportedExpressions: List[SupportedExpressions]): Option[String] =
            if (supportedExpressions.nonEmpty) {
                val exprStr = supportedExpressions.head.toString(
                    expr,
                    exprToString(_, useQualifier, useAlias = false),
                    useQualifier,
                    useAlias)

                exprStr match {
                    case res: Some[String] ⇒
                        res
                    case None ⇒
                        exprToString0(expr, supportedExpressions.tail)
                }
            }
            else
                None

        exprToString0(expr, SUPPORTED_EXPRESSIONS) match {
            case Some(str) ⇒ str

            case None ⇒
                throw new IgniteException("Unsupporte expression " + expr)
        }
    }

    /**
      * @param exprs Expressions to check.
      * @return True if `exprs` contains only allowed(i.e. can be pushed down to Ignite) expressions false otherwise.
      */
    def exprsAllowed(exprs: Seq[Expression]): Boolean =
        exprs.forall(exprsAllowed)

    /**
      * @param expr Expression to check.
      * @return True if `expr` allowed(i.e. can be pushed down to Ignite) false otherwise.
      *
      */
    def exprsAllowed(expr: Expression): Boolean =
        SUPPORTED_EXPRESSIONS.exists(_(expr, exprsAllowed))

    /**
      * Converts `input` into `AttributeReference`.
      *
      * @param input Expression to convert.
      * @param existingOutput Existing output.
      * @param exprId Optional expression ID to use.
      * @param alias Optional alias for a result.
      * @return Converted expression.
      */
    def toAttributeReference(input: Expression, existingOutput: Seq[NamedExpression], exprId: Option[ExprId] = None,
        alias: Option[String] = None): AttributeReference = {

        input match {
            case attr: AttributeReference ⇒
                val toCopy = existingOutput.find(_.exprId == attr.exprId).getOrElse(attr)

                AttributeReference(
                    name = toCopy.name,
                    dataType = toCopy.dataType,
                    metadata = alias
                        .map(new MetadataBuilder().withMetadata(toCopy.metadata).putString(ALIAS, _).build())
                        .getOrElse(toCopy.metadata)
                )(exprId = exprId.getOrElse(toCopy.exprId), qualifier = toCopy.qualifier)

            case a: Alias ⇒
                toAttributeReference(a.child, existingOutput, Some(a.exprId), Some(alias.getOrElse(a.name)))

            case agg: AggregateExpression ⇒
                agg.aggregateFunction match {
                    case c: Count ⇒
                        if (agg.isDistinct)
                            AttributeReference(
                                name = s"COUNT(DISTINCT ${c.children.map(exprToString(_)).mkString(" ")})",
                                dataType = LongType,
                                metadata = alias
                                    .map(new MetadataBuilder().putString(ALIAS, _).build())
                                    .getOrElse(Metadata.empty)
                            )(exprId = exprId.getOrElse(agg.resultId))
                        else
                            AttributeReference(
                                name = s"COUNT(${c.children.map(exprToString(_)).mkString(" ")})",
                                dataType = LongType,
                                metadata = alias
                                    .map(new MetadataBuilder().putString(ALIAS, _).build())
                                    .getOrElse(Metadata.empty)
                            )(exprId = exprId.getOrElse(agg.resultId))

                    case _ ⇒
                        toAttributeReference(agg.aggregateFunction, existingOutput, Some(exprId.getOrElse(agg.resultId)), alias)
                }

            case ne: NamedExpression ⇒
                AttributeReference(
                    name = exprToString(input),
                    dataType = input.dataType,
                    metadata = alias
                        .map(new MetadataBuilder().withMetadata(ne.metadata).putString(ALIAS, _).build())
                        .getOrElse(Metadata.empty)
                )(exprId = exprId.getOrElse(ne.exprId))

            case _ if exprsAllowed(input) ⇒
                AttributeReference(
                    name = exprToString(input),
                    dataType = input.dataType,
                    metadata = alias
                        .map(new MetadataBuilder().putString(ALIAS, _).build())
                        .getOrElse(Metadata.empty)
                )(exprId = exprId.getOrElse(NamedExpression.newExprId))

            case _  ⇒
                throw new IgniteException(s"Unsupported column expression $input")
        }
    }

    /**
      * @param dataType Spark data type.
      * @return SQL data type.
      */
    def toSqlType(dataType: DataType): String = dataType match {
        case BooleanType ⇒ "BOOLEAN"
        case IntegerType ⇒ "INT"
        case ByteType ⇒ "TINYINT"
        case ShortType ⇒ "SMALLINT"
        case LongType ⇒ "BIGINT"
        case DecimalType() ⇒ "DECIMAL"
        case DoubleType ⇒ "DOUBLE"
        case FloatType ⇒ "REAL"
        case DateType ⇒ "DATE"
        case TimestampType ⇒ "TIMESTAMP"
        case StringType ⇒ "VARCHAR"
        case BinaryType ⇒ "BINARY"
        case ArrayType(_, _) ⇒ "ARRAY"
        case _ ⇒
            throw new IgniteException(s"$dataType not supported!")
    }

    /**
      * @param expr Expression
      * @return True if expression or some of it children is AggregateExpression, false otherwise.
      */
    def hasAggregateInside(expr: Expression): Boolean = {
        def hasAggregateInside0(expr: Expression): Boolean = expr match {
            case AggregateExpression(_, _, _, _) ⇒
                true

            case e: Expression ⇒
                e.children.exists(hasAggregateInside0)
        }

        hasAggregateInside0(expr)
    }

    /**
      * Check if `acc` representing simple query.
      * Simple is `SELECT ... FROM table WHERE ... ` like query.
      * Without aggregation, limits, order, embedded select expressions.
      *
      * @param acc Accumulator to check.
      * @return True if accumulator stores simple query info, false otherwise.
      */
    def isSimpleTableAcc(acc: QueryAccumulator): Boolean = acc match {
        case acc: SingleTableSQLAccumulator if acc.table.isDefined ⇒
            acc.groupBy.isEmpty &&
                acc.localLimit.isEmpty &&
                acc.orderBy.isEmpty &&
                !acc.distinct &&
                !acc.outputExpressions.exists(hasAggregateInside)

        case _ ⇒
            false
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy