All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.couchbase.spark.sql.N1QLRelation.scala Maven / Gradle / Ivy

/*
 * Copyright (c) 2015 Couchbase, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.couchbase.spark.sql

import com.couchbase.client.java.query.N1qlQuery
import com.couchbase.spark.connection.CouchbaseConfig
import com.couchbase.spark.rdd.QueryRDD
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.sources._

/**
 * Implements a the BaseRelation for N1QL Queries.
 *
 * @param bucket the name of the bucket
 * @param userSchema the optional schema (if not provided it will be inferred)
 * @param sqlContext the sql context.
 */
class N1QLRelation(bucket: String, userSchema: Option[StructType], parameters: Map[String, String])
                  (@transient val sqlContext: SQLContext)
  extends BaseRelation
  with PrunedFilteredScan
  with Logging {

  private val cbConfig = CouchbaseConfig(sqlContext.sparkContext.getConf)
  private val bucketName = Option(bucket).getOrElse(cbConfig.buckets.head.name)
  private val idFieldName = parameters.getOrElse("idField", DefaultSource.DEFAULT_DOCUMENT_ID_FIELD)

  override val schema = userSchema.getOrElse[StructType] {
    val queryFilter = if (parameters.get("schemaFilter").isDefined) {
      "WHERE " + parameters.get("schemaFilter").get
    } else {
      ""
    }

    val query = s"SELECT META(`$bucketName`).id as `$idFieldName`, `$bucketName`.* " +
      s"FROM `$bucketName` $queryFilter LIMIT 1000"

    logInfo(s"Inferring schema from bucket $bucketName with query '$query'")

    val schema = sqlContext.read.json(
      QueryRDD(sqlContext.sparkContext, bucketName, N1qlQuery.simple(query)).map(_.value.toString)
    ).schema

    logInfo(s"Inferred schema is $schema")

    schema
  }

  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
    var stringFilter = buildFilter(filters)
    if (parameters.get("schemaFilter").isDefined) {
      if (!stringFilter.isEmpty) {
        stringFilter = stringFilter + " AND "
      }
      stringFilter += parameters.get("schemaFilter").get
    }

    if (!stringFilter.isEmpty) {
      stringFilter = " WHERE " + stringFilter
    }

    val query = "SELECT " + buildColumns(requiredColumns, bucketName) + " FROM `" +
      bucketName + "`" + stringFilter

    logInfo(s"Executing generated query: '$query'")

    sqlContext.read.json(
      QueryRDD(sqlContext.sparkContext, bucketName, N1qlQuery.simple(query)).map(_.value.toString)
    ).map(row =>
      Row.fromSeq(requiredColumns.map(col => row.get(row.fieldIndex(col))).toList)
    )
  }

  /**
   * Transforms the required columns into the field list for the select statement.
   *
   * @param requiredColumns the columns to transform.
   * @return the raw N1QL string
   */
  private def buildColumns(requiredColumns: Array[String], bucktName: String): String =  {
    if (requiredColumns.isEmpty) {
      return s"`$bucketName`.*"
    }

    requiredColumns
      .map(column => {
        if (column == idFieldName) {
          s"META(`$bucketName`).id as `$idFieldName`"
        } else {
          "`" + column + "`"
        }
      })
      .mkString(",")
  }


  /**
   * Transform the filters into a N1QL where clause.
   *
   * @todo In, And, Or, Not filters including recursion
   * @param filters the filters to transform
   * @return the transformed raw N1QL clause
   */
  private def buildFilter(filters: Array[Filter]): String = {
    if (filters.isEmpty) {
      return ""
    }

    val filter = new StringBuilder()
    var i = 0

    filters.foreach(f => {
      try {
        val encoded = N1QLRelation.filterToExpression(f)
        if (i > 0) {
          filter.append(" AND")
        }
        filter.append(encoded)
        i = i + 1
      } catch {
        case _: Exception => logInfo("Ignoring unsupported filter: " + f)
      }
    })

    filter.toString()
  }

}

object N1QLRelation {

  /**
   * Turns a filter into a N1QL expression.
   *
   * @param filter the filter to convert
   * @return the resulting expression
   */
  def filterToExpression(filter: Filter): String = {
    filter match {
      case EqualTo(attr, value) => s" ${attrToFilter(attr)} = " + valueToFilter(value)
      case GreaterThan(attr, value) => s" ${attrToFilter(attr)} > " + valueToFilter(value)
      case GreaterThanOrEqual(attr, value) => s" ${attrToFilter(attr)} >= " + valueToFilter(value)
      case LessThan(attr, value) => s" ${attrToFilter(attr)} < " + valueToFilter(value)
      case LessThanOrEqual(attr, value) => s" ${attrToFilter(attr)} <= " + valueToFilter(value)
      case IsNull(attr) => s" ${attrToFilter(attr)} IS NULL"
      case IsNotNull(attr) => s" ${attrToFilter(attr)} IS NOT NULL"
      case StringContains(attr, value) => s" CONTAINS(${attrToFilter(attr)}, '$value')"
      case StringStartsWith(attr, value) =>
        s" ${attrToFilter(attr)} LIKE '" + escapeForLike(value) + "%'"
      case StringEndsWith(attr, value) =>
        s" ${attrToFilter(attr)} LIKE '%" + escapeForLike(value) + "'"
      case In(attr, values) =>
        val encoded = values.map(valueToFilter).mkString(",")
        s" `$attr` IN [$encoded]"
      case And(left, right) =>
        val l = filterToExpression(left)
        val r = filterToExpression(right)
        s" ($l AND $r)"
      case Or(left, right) =>
        val l = filterToExpression(left)
        val r = filterToExpression(right)
        s" ($l OR $r)"
      case Not(f) =>
        val v = filterToExpression(f)
        s" NOT ($v)"
    }
  }

  def escapeForLike(value: String): String =
    value.replaceAll("\\.", "\\\\.").replaceAll("\\*", "\\\\*")

  def valueToFilter(value: Any): String = value match {
    case v: String => s"'$v'"
    case v => s"$v"
  }

  def attrToFilter(attr: String): String = {
    attr.split('.').map(elem => s"`$elem`").mkString(".")
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy