Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to SequoiaDB (C) under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership. The SequoiaDB (C) licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Licensed to STRATIO (C) under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership. The STRATIO (C) licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.sequoiadb.spark.schema
/**
* Source File Name = SequoiadbRowConverter.scala
* Description = Record conversion between BSONObject and schema RDD
* Restrictions = N/A
* Change Activity:
* Date Who Description
* ======== ================== ================================================
* 20150306 Tao Wang Modified from original
*/
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.types._
import org.apache.spark.sql._
import org.bson.BSONObject
import org.bson.BasicBSONObject
import org.bson.types.BasicBSONList
import java.sql.Timestamp
import org.bson.types.BSONTimestamp
import org.bson.types.Binary
import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
/**
* SequoiadbRowConverter support RDD transformations
* from BSONObject to Row and vice versa
*/
object SequoiadbRowConverter extends JsonSupport
with Serializable {
/**
*
* @param schema RDD native schema
* @param rdd Current native RDD
* @return A brand new RDD of Spark SQL Row type.
*/
def asRow(schema: StructType, rdd: RDD[BSONObject]): RDD[Row] = {
rdd.map { record =>
recordAsRow(dbObjectToMap(record), schema)
}
}
/**
* Given a schema, it converts a JSON object (as map) into a Row
* @param json DBObject map
* @param schema Schema
* @return The converted row
*/
def recordAsRow(
json: Map[String, AnyRef],
schema: StructType): Row = {
val values: Seq[Any] = schema.fields.map {
case StructField(name, dataType, _, _) =>
json.get(name).flatMap(v => Option(v)).map(
toSQL(_, dataType)).orNull
}
Row.fromSeq(values)
}
/**
* Given a schema, it converts a Row into a DBObject
* @param row Row to be converted
* @param schema Schema
* @return The converted DBObject
*/
def rowAsDBObject(row: Row, schema: StructType): BSONObject = {
val attMap: Map[String, Any] = schema.fields.zipWithIndex.map {
case (att, idx) => (att.name, toDBObject(row(idx),att.dataType))
}.toMap
val obj: BSONObject = new BasicBSONObject ()
obj.putAll(attMap)
obj
}
/**
* It converts some Row attribute value into
* a DBObject field
* @param value Row attribute
* @param dataType Attribute type
* @return The converted value into a DBObject field.
* @return The converted value into a DBObject field.
*/
def toDBObject(value: Any, dataType: DataType): Any = {
Option(value).map{v =>
(dataType,v) match {
case (ArrayType(elementType, _),array: ArrayBuffer[Any@unchecked]) => {
val bl : BasicBSONList = new BasicBSONList
array.zipWithIndex.foreach{
case ( obj, i ) => bl.put(i, toDBObject(obj,elementType))
}
bl
}
case (struct: StructType,value: GenericRow) =>
rowAsDBObject(value,struct)
case (struct: DecimalType, value ) => value.asInstanceOf[Decimal].toDouble
case ( struct: TimestampType, value ) => {
val time = value.asInstanceOf[Timestamp]
new BSONTimestamp ( (time.getTime()/1000).toInt, (time.getTime()%1000).toInt*1000)
}
case ( struct: ByteType, value ) => value.asInstanceOf[Byte].toInt
case ( struct: ShortType, value ) => value.asInstanceOf[Short].toInt
case ( struct: BinaryType, value ) => new Binary ( value.asInstanceOf[Array[Byte]] )
case _ => v
}
}.orNull
}
/**
* It converts some DBObject attribute value into
* a Row field
* @param value DBObject attribute
* @param dataType Attribute type
* @return The converted value into a Row field.
*/
def toSQL(value: Any, dataType: DataType): Any = {
Option(value).map{value =>
dataType match {
case ArrayType(elementType, _) =>
value.asInstanceOf[BasicBSONList].map(toSQL(_, elementType))
case struct: StructType =>
recordAsRow(dbObjectToMap(value.asInstanceOf[BSONObject]), struct)
case _ =>
//Assure value is mapped to schema constrained type.
enforceCorrectType(value, dataType)
}
}.orNull
}
/**
* It creates a map with dbObject attribute values.
* @param dBObject Object to be splitted into attribute tuples.
* @return A map with dbObject attributes.
*/
def dbObjectToMap(dBObject: BSONObject): Map[String, AnyRef] = {
val scalaMap = dBObject.toMap.map {
case ( k,v )=>(k.asInstanceOf[String] -> v.asInstanceOf[AnyRef])
}
scalaMap.toMap
}
}