spark.jobserver.HiveTestJob.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-jobserver Show documentation
Show all versions of spark-jobserver Show documentation
SnappyData distributed data store and execution engine
The newest version!
package spark.jobserver
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.spark._
import org.apache.spark.sql.hive.HiveContext
/**
* A test job that accepts a HiveContext, as opposed to the regular SparkContext.
* Initializes some dummy data into a table, reads it back out, and returns a count
* (Will create Hive metastore at job-server/metastore_db if Hive isn't configured)
*/
object HiveLoaderJob extends SparkHiveJob {
// The following data is stored at ./hive_test_job_addresses.txt
// val addresses = Seq(
// Address("Bob", "Charles", "101 A St.", "San Jose"),
// Address("Sandy", "Charles", "10200 Ranch Rd.", "Purple City"),
// Address("Randy", "Charles", "101 A St.", "San Jose")
// )
val tableCreate = "CREATE TABLE `default`.`test_addresses`"
val tableArgs = "(`firstName` String, `lastName` String, `address` String, `city` String)"
val tableRowFormat = "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'"
val tableColFormat = "COLLECTION ITEMS TERMINATED BY '\002'"
val tableMapFormat = "MAP KEYS TERMINATED BY '\003' STORED"
val tableAs = "AS TextFile"
//Will fail with a 'SemanticException : Invalid path' if this file is not there
val loadPath = "'test/spark.jobserver/hive_test_job_addresses.txt'"
def validate(hive: HiveContext, config: Config): SparkJobValidation = SparkJobValid
def runJob(hive: HiveContext, config: Config): Any = {
hive.sql("DROP TABLE if exists `default`.`test_addresses`")
hive.sql(s"$tableCreate $tableArgs $tableRowFormat $tableColFormat $tableMapFormat $tableAs")
hive.sql(s"LOAD DATA LOCAL INPATH $loadPath OVERWRITE INTO TABLE `default`.`test_addresses`")
val addrRdd = hive.sql("SELECT * FROM `default`.`test_addresses`")
addrRdd.count()
}
}
/**
* This job simply runs the Hive SQL in the config.
*/
object HiveTestJob extends SparkHiveJob {
def validate(hive: HiveContext, config: Config): SparkJobValidation = SparkJobValid
def runJob(hive: HiveContext, config: Config): Any = {
hive.sql(config.getString("sql")).collect()
}
}