All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.CassandraPushdownHandler.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to Tuplejump Software Pvt. Ltd. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Tuplejump Software Pvt. Ltd. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.apache.spark.sql

import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.expressions._

object CassandraPushdownHandler extends PushdownHandler {
  private[sql] def getPushdownFilters(filters: Seq[Expression], partitionKeys: List[String], clusteringKeys: List[String], allIndexes: List[String]): PushdownFilters = {
    val indexes: List[String] = allIndexes diff partitionKeys

    //Get the defined equal filters
    val equals = filters.map(mapEqualsToColumnNames).filter(_.isDefined).map(_.get)

    val equalFilters: Seq[String] = equals.map(_._1)
    //Get the clustering keys in the equal list
    val equalsOnClusteringKeys = clusteringKeys.intersect(equalFilters).toList

    val clusteringKeysToUse: List[String] = getClusterKeysToUse(clusteringKeys, equalsOnClusteringKeys)

    val equalIndexes: List[String] = equalFilters.intersect(indexes).toList

    val gtlts = filters.map(mapGtLtToColumnNames).filter(_.isDefined).map(_.get)

    val gtltFilters = gtlts.map(_._1)

    //Since we can use NON EQ indexes only if we have atleast one eq index check for that
    val nonEqIndexes: List[String] = if (equalIndexes.isEmpty) {
      List.empty[String]
    } else {
      gtltFilters.intersect(indexes).toList
    }

    // If a gtlt query is on clustering key immediately following the eq keys we can use that
    val nextClusteringKey = if (clusteringKeys.length > clusteringKeysToUse.length) Some(clusteringKeys(clusteringKeysToUse.length)) else None

    val gtltClusteringKey: List[String] = nextClusteringKey match {
      case Some(nck) => if (gtltFilters.contains(nck)) {
        gtltFilters.filter(_ == nck).toList
      } else {
        List.empty[String]
      }
      case None => List.empty[String]
    }

    var possibleFilters = (equals ++ gtlts)

    val pushdownExpr = (clusteringKeysToUse ++ gtltClusteringKey ++ equalIndexes ++ nonEqIndexes).distinct.flatMap {
      col =>
        possibleFilters.filter(_._1 == col)
    }.map(_._2)

    val retainExpr = filters.toList diff pushdownExpr

    PushdownFilters(pushdownExpr, retainExpr)
  }

  private def getClusterKeysToUse(clusteringKeys: List[String], filteredClusteringKeys: List[String], index: Int = 0): List[String] = {
    if (filteredClusteringKeys.isEmpty) {
      List.empty[String]
    } else {
      if (filteredClusteringKeys(0) != clusteringKeys(index)) {
        List.empty[String]
      } else {
        List(filteredClusteringKeys(0)) ::: getClusterKeysToUse(clusteringKeys, filteredClusteringKeys.tail, index + 1)
      }
    }
  }
}

case class PushdownFilters(filtersToPushdown: Seq[Expression], filtersToRetain: Seq[Expression])


object StargatePushdownHandler extends PushdownHandler with Logging {
  private[sql] def getPushdownFilters(filters: Seq[Expression]): PushdownFilters = {
    logInfo("Using STARGATE filter")
    val (pushdown, retain) = filters.partition(isSupportedQuery)

    logInfo(s"PUSHDOWN: $pushdown")

    PushdownFilters(pushdown, retain)
  }

  private val isSupportedQuery: Expression => Boolean = {
    case p: Expression if isSupportedSimpleQuery(p) => true
    case p: Expression if isSupportedComplexQuery(p) => true
    case _ => false
  }

  private val isSupportedComplexQuery: Expression => Boolean = {
    case p@Or(left: Expression, right: Expression) =>
      (isSupportedSimpleQuery(right) || isSupportedComplexQuery(right)) &&
        (isSupportedSimpleQuery(left) || isSupportedComplexQuery(left))

    case p@And(left: Expression, right: Expression) =>
      (isSupportedSimpleQuery(right) || isSupportedComplexQuery(right)) &&
        (isSupportedSimpleQuery(left) || isSupportedComplexQuery(left))

    case p@Not(left: Expression) =>
      isSupportedSimpleQuery(left) || isSupportedComplexQuery(left)

    case _ => false
  }

  private val isSupportedSimpleQuery: Expression => Boolean = {
    case p@EqualTo(left: NamedExpression, right: Literal) => true
    case p@EqualTo(Cast(left: NamedExpression, _), right: Literal) => true
    case p@LessThan(left: NamedExpression, right: Literal) => true
    case p@LessThan(Cast(left: NamedExpression, _), right: Literal) => true
    case p@LessThanOrEqual(left: NamedExpression, right: Literal) => true
    case p@LessThanOrEqual(Cast(left: NamedExpression, _), right: Literal) => true
    case p@GreaterThan(left: NamedExpression, right: Literal) => true
    case p@GreaterThan(Cast(left: NamedExpression, _), right: Literal) => true
    case p@GreaterThanOrEqual(left: NamedExpression, right: Literal) => true
    case p@GreaterThanOrEqual(Cast(left: NamedExpression, _), right: Literal) => true
    case p@In(left: NamedExpression, right: Seq[Literal@unchecked]) => true
    case _ => false
  }
}


private[sql] trait PushdownHandler {
  protected[sql] def mapEqualsToColumnNames: Expression => Option[(String, Expression)] = {
    case p@EqualTo(left: NamedExpression, right: Literal) => Some(left.name -> p)
    case p@EqualTo(Cast(left: NamedExpression, _), right: Literal) => Some(left.name -> p)
    case _ => None
  }

  protected[sql] def mapGtLtToColumnNames: Expression => Option[(String, Expression)] = {
    case p@LessThan(left: NamedExpression, right: Literal) => Some(left.name -> p)
    case p@LessThan(Cast(left: NamedExpression, _), right: Literal) => Some(left.name -> p)
    case p@LessThanOrEqual(left: NamedExpression, right: Literal) => Some(left.name -> p)
    case p@LessThanOrEqual(Cast(left: NamedExpression, _), right: Literal) => Some(left.name -> p)
    case p@GreaterThan(left: NamedExpression, right: Literal) => Some(left.name -> p)
    case p@GreaterThan(Cast(left: NamedExpression, _), right: Literal) => Some(left.name -> p)
    case p@GreaterThanOrEqual(left: NamedExpression, right: Literal) => Some(left.name -> p)
    case p@GreaterThanOrEqual(Cast(left: NamedExpression, _), right: Literal) => Some(left.name -> p)
    case _ => None
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy