com.couchbase.spark.sql.N1QLRelation.scala Maven / Gradle / Ivy
* Copyright (c) 2015 Couchbase, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package com.couchbase.spark.sql
import com.couchbase.spark.connection.CouchbaseConfig
import com.couchbase.spark.rdd.QueryRDD
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.sources._
* Implements a the BaseRelation for N1QL Queries.
* @param bucket the name of the bucket
* @param userSchema the optional schema (if not provided it will be inferred)
* @param sqlContext the sql context.
class N1QLRelation(bucket: String, userSchema: Option[StructType], parameters: Map[String, String])
(@transient val sqlContext: SQLContext)
extends BaseRelation
with PrunedFilteredScan
with Logging {
private val cbConfig = CouchbaseConfig(sqlContext.sparkContext.getConf)
private val bucketName = Option(bucket).getOrElse(
private val idFieldName = parameters.getOrElse("idField", DefaultSource.DEFAULT_DOCUMENT_ID_FIELD)
override val schema = userSchema.getOrElse[StructType] {
val queryFilter = if (parameters.get("schemaFilter").isDefined) {
"WHERE " + parameters.get("schemaFilter").get
} else {
val query = s"SELECT META(`$bucketName`).id as `$idFieldName`, `$bucketName`.* " +
s"FROM `$bucketName` $queryFilter LIMIT 1000"
logInfo(s"Inferring schema from bucket $bucketName with query '$query'")
val schema =
QueryRDD(sqlContext.sparkContext, bucketName, N1qlQuery.simple(query)).map(_.value.toString)
logInfo(s"Inferred schema is $schema")
override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
var stringFilter = buildFilter(filters)
if (parameters.get("schemaFilter").isDefined) {
if (!stringFilter.isEmpty) {
stringFilter = stringFilter + " AND "
stringFilter += parameters.get("schemaFilter").get
if (!stringFilter.isEmpty) {
stringFilter = " WHERE " + stringFilter
val query = "SELECT " + buildColumns(requiredColumns, bucketName) + " FROM `" +
bucketName + "`" + stringFilter
logInfo(s"Executing generated query: '$query'")
QueryRDD(sqlContext.sparkContext, bucketName, N1qlQuery.simple(query)).map(_.value.toString)
).map(row =>
Row.fromSeq( => row.get(row.fieldIndex(col))).toList)
* Transforms the required columns into the field list for the select statement.
* @param requiredColumns the columns to transform.
* @return the raw N1QL string
private def buildColumns(requiredColumns: Array[String], bucktName: String): String = {
if (requiredColumns.isEmpty) {
return s"`$bucketName`.*"
.map(column => {
if (column == idFieldName) {
s"META(`$bucketName`).id as `$idFieldName`"
} else {
"`" + column + "`"
* Transform the filters into a N1QL where clause.
* @todo In, And, Or, Not filters including recursion
* @param filters the filters to transform
* @return the transformed raw N1QL clause
private def buildFilter(filters: Array[Filter]): String = {
if (filters.isEmpty) {
return ""
val filter = new StringBuilder()
var i = 0
filters.foreach(f => {
try {
val encoded = N1QLRelation.filterToExpression(f)
if (i > 0) {
filter.append(" AND")
i = i + 1
} catch {
case _: Exception => logInfo("Ignoring unsupported filter: " + f)
object N1QLRelation {
* Turns a filter into a N1QL expression.
* @param filter the filter to convert
* @return the resulting expression
def filterToExpression(filter: Filter): String = {
filter match {
case EqualTo(attr, value) => s" ${attrToFilter(attr)} = " + valueToFilter(value)
case GreaterThan(attr, value) => s" ${attrToFilter(attr)} > " + valueToFilter(value)
case GreaterThanOrEqual(attr, value) => s" ${attrToFilter(attr)} >= " + valueToFilter(value)
case LessThan(attr, value) => s" ${attrToFilter(attr)} < " + valueToFilter(value)
case LessThanOrEqual(attr, value) => s" ${attrToFilter(attr)} <= " + valueToFilter(value)
case IsNull(attr) => s" ${attrToFilter(attr)} IS NULL"
case IsNotNull(attr) => s" ${attrToFilter(attr)} IS NOT NULL"
case StringContains(attr, value) => s" CONTAINS(${attrToFilter(attr)}, '$value')"
case StringStartsWith(attr, value) =>
s" ${attrToFilter(attr)} LIKE '" + escapeForLike(value) + "%'"
case StringEndsWith(attr, value) =>
s" ${attrToFilter(attr)} LIKE '%" + escapeForLike(value) + "'"
case In(attr, values) =>
val encoded =",")
s" `$attr` IN [$encoded]"
case And(left, right) =>
val l = filterToExpression(left)
val r = filterToExpression(right)
s" ($l AND $r)"
case Or(left, right) =>
val l = filterToExpression(left)
val r = filterToExpression(right)
s" ($l OR $r)"
case Not(f) =>
val v = filterToExpression(f)
s" NOT ($v)"
def escapeForLike(value: String): String =
value.replaceAll("\\.", "\\\\.").replaceAll("\\*", "\\\\*")
def valueToFilter(value: Any): String = value match {
case v: String => s"'$v'"
case v => s"$v"
def attrToFilter(attr: String): String = {
attr.split('.').map(elem => s"`$elem`").mkString(".")
© 2015 - 2025 Weber Informatics LLC | Privacy Policy