All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sequoiadb.spark.io.SequoiadbReader.scala Maven / Gradle / Ivy

/*
 *  Licensed to SequoiaDB (C) under one or more contributor license agreements.
 *  See the NOTICE file distributed with this work for additional information
 *  regarding copyright ownership. The SequoiaDB (C) licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License. You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied. See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package com.sequoiadb.spark.io

/**
 * Source File Name = SequoiadbReader.scala
 * Description      = Reader class for Sequoiadb data source
 * Restrictions     = N/A
 * Change Activity:
 * Date     Who                Description
 * ======== ================== ================================================
 * 20150307 Tao Wang           Initial Draft
 */
import com.sequoiadb.spark.SequoiadbConfig
import com.sequoiadb.spark.partitioner.SequoiadbPartition
import com.sequoiadb.spark.SequoiadbException
import com.sequoiadb.spark.util.ConnectionUtil
import org.apache.spark.Partition
import org.apache.spark.sql.sources._
import org.bson.BSONObject
import org.bson.BasicBSONObject
import org.bson.types.BasicBSONList
import com.sequoiadb.base.DBCursor
import com.sequoiadb.base.SequoiadbDatasource
import com.sequoiadb.base.Sequoiadb
import scala.collection.JavaConversions._

/**
 *
 * @param config Configuration object.
 * @param requiredColumns Pruning fields
 * @param filters Added query filters
 */
class SequoiadbReader(
  config: SequoiadbConfig,
  requiredColumns: Array[String],
  filters: Array[Filter]) {

  private var dbConnectionPool : Option[SequoiadbDatasource] = None
  private var dbConnection : Option[Sequoiadb] = None
  private var dbCursor : Option[DBCursor] = None
  def close(): Unit = {
    dbCursor.fold(ifEmpty=()) { cursor=>
      cursor.close()
      dbCursor = None
    }
    dbConnectionPool.fold(ifEmpty=()) { connectionpool =>
      dbConnection.fold(ifEmpty=()) { connection =>
        connectionpool.close(connection)
      }
      connectionpool.close
    }
  }

  def hasNext: Boolean = {
    dbCursor.fold(ifEmpty=false){_.hasNext()}
  }

  def next(): BSONObject = {
    dbCursor.fold(ifEmpty=
      throw new IllegalStateException("dbCursor is not initialized"))(_.getNext)
  }


  /**
   * Initialize SequoiaDB reader
   * @param partition Where to read from
   */
  def init(partition: Partition): Unit = {
    // convert from Spark Partition to SequoiadbPartition
    val sdbPartition = partition.asInstanceOf[SequoiadbPartition]
    try {
      // TODO: Need to handle PreferedInstance in config.preference
      // For now let's simply turn the selection to sdbdatasource
      dbConnectionPool = Option ( new SequoiadbDatasource (
          // use the hosts for the given partition
          sdbPartition.hosts.map{it=>it.toString},
          config[String](SequoiadbConfig.Username),
          config[String](SequoiadbConfig.Password),
          ConnectionUtil.initConfigOptions,
          ConnectionUtil.initSequoiadbOptions ) )
      // pickup a connection
      val connection = dbConnectionPool.get.getConnection
      // get collection space
      val cs = connection.getCollectionSpace(sdbPartition.collection.collectionspace)
      // get collection
      val cl = cs.getCollection(sdbPartition.collection.collection)
      // perform query
      dbCursor = Option(cl.query (
          SequoiadbReader.queryPartition(filters),
          SequoiadbReader.selectFields(requiredColumns),
          null,
          null))
    }
    catch {
      case ex: Exception =>
        throw SequoiadbException(ex.getMessage, ex)
    }
  }
}

object SequoiadbReader {
  /**
   * Create query partition using given filters.
   *
   * @param filters the Spark filters to be converted to SequoiaDB filters
   * @return the query object
   */
  def queryPartition ( filters: Array[Filter]): BSONObject = {
    val obj : BSONObject = new BasicBSONObject
    filters.foreach {
      case EqualTo(attribute, value) => {
        val subobj : BSONObject = new BasicBSONObject
        subobj.put("$et", value)
        obj.put(attribute,subobj)
      }
      case GreaterThan(attribute, value) => {
        val subobj : BSONObject = new BasicBSONObject
        subobj.put("$gt", value)
        obj.put(attribute,subobj)
      }
      case GreaterThanOrEqual(attribute, value) => {
        val subobj : BSONObject = new BasicBSONObject
        subobj.put("$gte", value)
        obj.put(attribute,subobj)
      }
      case In(attribute, values) => {
        val subobj : BSONObject = new BasicBSONObject
        val arr : BSONObject = new BasicBSONList
        Array.tabulate(values.length){ i => arr.put(""+i, values(i))}
        subobj.put("$in", arr)
        obj.put(attribute,subobj)
      }
      case LessThan(attribute, value) => {
        val subobj : BSONObject = new BasicBSONObject
        subobj.put("$lt", value)
        obj.put(attribute,subobj)
      }
      case LessThanOrEqual(attribute, value) => {
        val subobj : BSONObject = new BasicBSONObject
        subobj.put("$lte", value)
        obj.put(attribute,subobj)
      }
      case IsNull(attribute) => {
        val subobj : BSONObject = new BasicBSONObject
        subobj.put("$isnull", 1)
        obj.put(attribute,subobj)
      }
      case IsNotNull(attribute) => {
        val subobj : BSONObject = new BasicBSONObject
        subobj.put("$isnull", 0)
        obj.put(attribute,subobj)
      }
      case And(left, right) => {
        val leftCond : BSONObject = queryPartition ( Array(left) )
        val rightCond : BSONObject = queryPartition ( Array(right) )
        val arr : BSONObject = new BasicBSONList
        arr.put ( "0", leftCond )
        arr.put ( "1", rightCond )
        obj.put ( "$and",arr )
      }
      case Or(left, right) => {
        val leftCond : BSONObject = queryPartition ( Array(left) )
        val rightCond : BSONObject = queryPartition ( Array(right) )
        val arr : BSONObject = new BasicBSONList
        arr.put ( "0", leftCond )
        arr.put ( "1", rightCond )
        obj.put ( "$or",arr )
      }
      case Not(child) =>{
        val notCond : BSONObject = queryPartition ( Array(child) )
        val arr : BSONObject = new BasicBSONList
        arr.put ( "0", notCond )
        obj.put ( "$not",arr )
      }
    }
    obj
  }
  /**
   *
   * Prepared BSONObject used to specify required fields in sequoiadb 'query'
   * @param fields Required fields
   * @return A sequoiadb object that represents selector.
   */
  def selectFields(fields: Array[String]): BSONObject = {
    val res : BSONObject = new BasicBSONObject
    fields.map { res.put ( _, null ) }
    res
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy