spark.jobserver.io.JobSqlDAO.scala Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of spark-jobserver Show documentation
SnappyData distributed data store and execution engine
The newest version!
package spark.jobserver.io

import com.typesafe.config.{ConfigRenderOptions, Config, ConfigFactory}
import java.io.{FileOutputStream, BufferedOutputStream, File}
import java.sql.Timestamp
import javax.sql.DataSource
import org.joda.time.DateTime
import org.slf4j.LoggerFactory
import scala.slick.driver.JdbcProfile
import scala.slick.jdbc.meta.MTable
import scala.reflect.runtime.universe
import org.apache.commons.dbcp.BasicDataSource

class JobSqlDAO(config: Config) extends JobDAO {
  val slickDriverClass = config.getString("spark.jobserver.sqldao.slick-driver")
  val jdbcDriverClass = config.getString("spark.jobserver.sqldao.jdbc-driver")

  val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)
  val profileModule = runtimeMirror.staticModule(slickDriverClass)
  val profile = runtimeMirror.reflectModule(profileModule).instance.asInstanceOf[JdbcProfile]
  import profile.simple._

  private val logger = LoggerFactory.getLogger(getClass)

  // NOTE: below is only needed for H2 drivers
  private val rootDir = config.getString("spark.jobserver.sqldao.rootdir")
  private val rootDirFile = new File(rootDir)
  logger.info("rootDir is " + rootDirFile.getAbsolutePath)

  // Definition of the tables
  //scalastyle:off
  class Jars(tag: Tag) extends Table[(Int, String, Timestamp, Array[Byte])](tag, "JARS") {
    def jarId = column[Int]("JAR_ID", O.PrimaryKey, O.AutoInc)
    def appName = column[String]("APP_NAME")
    def uploadTime = column[Timestamp]("UPLOAD_TIME")
    def jar = column[Array[Byte]]("JAR")
    // Every table needs a * projection with the same type as the table's type parameter
    def * = (jarId, appName, uploadTime, jar)
  }
  val jars = TableQuery[Jars]

  // Explicitly avoiding to label 'jarId' as a foreign key to avoid dealing with
  // referential integrity constraint violations.
  class Jobs(tag: Tag) extends Table[(String, String, Int, String, Timestamp,
                                      Option[Timestamp], Option[String])] (tag, "JOBS") {
    def jobId = column[String]("JOB_ID", O.PrimaryKey)
    def contextName = column[String]("CONTEXT_NAME")
    def jarId = column[Int]("JAR_ID") // FK to JARS table
    def classPath = column[String]("CLASSPATH")
    def startTime = column[Timestamp]("START_TIME")
    def endTime = column[Option[Timestamp]]("END_TIME")
    def error = column[Option[String]]("ERROR", O.DBType("varchar"))
    def * = (jobId, contextName, jarId, classPath, startTime, endTime, error)
  }
  val jobs = TableQuery[Jobs]

  class Configs(tag: Tag) extends Table[(String, String)](tag, "CONFIGS") {
    def jobId = column[String]("JOB_ID", O.PrimaryKey)
    def jobConfig = column[String]("JOB_CONFIG", O.DBType("varchar"))
    def * = (jobId, jobConfig)
  }
  //scalastyle:on
  val configs = TableQuery[Configs]

  // DB initialization
  val jdbcUrl = config.getString("spark.jobserver.sqldao.jdbc.url")
  val jdbcUser = config.getString("spark.jobserver.sqldao.jdbc.user")
  val jdbcPassword = config.getString("spark.jobserver.sqldao.jdbc.password")
  val dbcpMaxActive = config.getInt("spark.jobserver.sqldao.dbcp.maxactive")
  val dbcpMaxIdle = config.getInt("spark.jobserver.sqldao.dbcp.maxidle")
  val dbcpInitialSize = config.getInt("spark.jobserver.sqldao.dbcp.initialsize")
  val dataSource: DataSource = {
    val ds = new BasicDataSource
    ds.setDriverClassName(jdbcDriverClass)
    ds.setUsername(jdbcUser)
    ds.setPassword(jdbcPassword)
    ds.setMaxActive(dbcpMaxActive)
    ds.setMaxIdle(dbcpMaxIdle)
    ds.setInitialSize(dbcpInitialSize)
    ds.setUrl(jdbcUrl)
    ds
  }
  val db = Database.forDataSource(dataSource)

  // Server initialization
  init()

  private def init() {
    // Create the data directory if it doesn't exist
    if (!rootDirFile.exists()) {
      if (!rootDirFile.mkdirs()) {
        throw new RuntimeException("Could not create directory " + rootDir)
      }
    }

    // Create the tables if they don't exist
    db withSession {
      implicit session =>

        if (isAllTablesNotExist()) {
          // All tables don't exist. It's the first time the Job Server runs. So, create all tables.
          logger.info("Creating JARS, CONFIGS and JOBS tables ...")
          jars.ddl.create
          configs.ddl.create
          jobs.ddl.create
        } else if (isSomeTablesNotExist()) {
          // Only some tables exist, not all. It means there is data corruption in the metadata-store.
          // Exit the Job Server now.
          throw new RuntimeException("Some tables in metadata-store are missing. Please recover it first.")
        }

        // If the cases above are not true, it means all tables exit. No need to initialize the database.
    }
  }

  // Check if a single exist
  private def isTableExist(tableName: String)(implicit session: Session): Boolean =
    !MTable.getTables(tableName).list.isEmpty

  // Check if "all tables don't exist" is true
  private def isAllTablesNotExist()(implicit session: Session): Boolean =
    !isTableExist("JARS") && !isTableExist("CONFIGS") && !isTableExist("JOBS")

  // Check if "some tables don't exist" is true
  private def isSomeTablesNotExist()(implicit session: Session): Boolean =
    !isTableExist("JARS") || !isTableExist("CONFIGS") || !isTableExist("JOBS")

  override def saveJar(appName: String, uploadTime: DateTime, jarBytes: Array[Byte]) {
    // The order is important. Save the jar file first and then log it into database.
    cacheJar(appName, uploadTime, jarBytes)

    // log it into database
    val jarId = insertJarInfo(JarInfo(appName, uploadTime), jarBytes)
  }

  override def getApps: Map[String, DateTime] = {
    db withSession {
      implicit session =>

      // It's "select appName, max(uploadTime) from jars group by appName
      // max(uploadTime) is the latest upload time of the jar.
        val query = jars.groupBy { _.appName }.map {
          case (appName, jar) => (appName -> jar.map(_.uploadTime).max.get)
        }

        query.list.map {
          case (appName: String, timestamp: Timestamp) =>
            (appName, convertDateSqlToJoda(timestamp))
        }.toMap
    }
  }

  // Insert JarInfo and its jar into db and return the primary key associated with that row
  private def insertJarInfo(jarInfo: JarInfo, jarBytes: Array[Byte]): Int = {
    // Must provide a value that will be ignored because the JARS jobId column
    // is tagged with O.AutoInc
    val IGNORED_VAL = -1

    db withSession {
      implicit session =>

        (jars returning jars.map(_.jarId)) +=
          (IGNORED_VAL, jarInfo.appName, convertDateJodaToSql(jarInfo.uploadTime), jarBytes)
    }
  }

  override def retrieveJarFile(appName: String, uploadTime: DateTime): String = {
    val jarFile = new File(rootDir, createJarName(appName, uploadTime) + ".jar")
    if (!jarFile.exists()) {
      fetchAndCacheJarFile(appName, uploadTime)
    }
    jarFile.getAbsolutePath
  }

  // Fetch the jar file from database and cache it into local file system.
  private def fetchAndCacheJarFile(appName: String, uploadTime: DateTime) {
    val jarBytes = fetchJar(appName, uploadTime)
    cacheJar(appName, uploadTime, jarBytes)
  }

  // Fetch the jar from the database
  private def fetchJar(appName: String, uploadTime: DateTime): Array[Byte] = {
    db withSession {
      implicit session =>

        val dateTime = convertDateJodaToSql(uploadTime)
        val query = jars.filter { jar =>
          jar.appName === appName && jar.uploadTime === dateTime
        }.map( _.jar )

        // TODO: check if this list is empty?
        query.list.head
    }
  }

  // Fetch the primary key for a particular jar from the database
  // TODO: Might not be needed but was very quick to implement
  private def fetchJarId(appName: String, uploadTime: DateTime): Int = {
    db withSession {
      implicit session =>

        queryJarId(appName, uploadTime)
    }
  }

  private def queryJarId(appName: String, uploadTime: DateTime)(implicit session: Session): Int = {
    val dateTime = convertDateJodaToSql(uploadTime)
    val query = jars.filter { jar =>
      jar.appName === appName && jar.uploadTime === dateTime
    }.map( _.jarId )

    query.list.head
  }

  // Cache the jar file into local file system.
  private def cacheJar(appName: String, uploadTime: DateTime, jarBytes: Array[Byte]) {
    val outFile = new File(rootDir, createJarName(appName, uploadTime) + ".jar")
    val bos = new BufferedOutputStream(new FileOutputStream(outFile))
    try {
      logger.debug("Writing {} bytes to file {}", jarBytes.size, outFile.getPath)
      bos.write(jarBytes)
      bos.flush()
    } finally {
      bos.close()
    }
  }

  private def createJarName(appName: String, uploadTime: DateTime): String = appName + "-" + uploadTime

  // Convert from joda DateTime to java.sql.Timestamp
  private def convertDateJodaToSql(dateTime: DateTime): Timestamp =
    new Timestamp(dateTime.getMillis())

  // Convert from java.sql.Timestamp to joda DateTime
  private def convertDateSqlToJoda(timestamp: Timestamp): DateTime =
    new DateTime(timestamp.getTime())

  override def getJobConfigs: Map[String, Config] = {
    db withSession {
      implicit sessions =>

        configs.list.map { case (jobId, jobConfig) =>
          (jobId -> ConfigFactory.parseString(jobConfig))
        }.toMap
    }
  }

  override def saveJobConfig(jobId: String, jobConfig: Config) {
    db withSession {
      implicit sessions =>

        configs += (jobId, jobConfig.root().render(ConfigRenderOptions.concise()))
    }
  }

  override def saveJobInfo(jobInfo: JobInfo) {
    db withSession {
      implicit sessions =>

        // First, query JARS table for a jarId
        val jarId = queryJarId(jobInfo.jarInfo.appName, jobInfo.jarInfo.uploadTime)

        // Extract out the the JobInfo members and convert any members to appropriate SQL types
        val JobInfo(jobId, contextName, _, classPath, startTime, endTime, error) = jobInfo
        val (start, endOpt, errOpt) = (convertDateJodaToSql(startTime),
          endTime.map(convertDateJodaToSql(_)),
          error.map(_.getMessage))

        // When you run a job asynchronously, the same data is written twice with a different
        // endTime and error value. When the row does not exists we write the data as new, otherwise,
        // we want to update the row.
        val updateQuery = jobs.filter(_.jobId === jobId).map(job => (job.endTime, job.error))

        updateQuery.list.size match {
          case 0 => jobs += (jobId, contextName, jarId, classPath, start, endOpt, errOpt)
          case _ => updateQuery.update(endOpt, errOpt)
        }
    }
  }

  override def getJobInfos(limit: Int): Seq[JobInfo] = {
    db withSession {
      implicit sessions =>

        // Join the JARS and JOBS tables without unnecessary columns
        val joinQuery = for {
          jar <- jars
          j <- jobs if j.jarId === jar.jarId
        } yield
          (j.jobId, j.contextName, jar.appName, jar.uploadTime, j.classPath, j.startTime, j.endTime, j.error)
        val sortQuery = joinQuery.sortBy(_._6.desc)
        val limitQuery = sortQuery.take(limit)
        // Transform the each row of the table into a map of JobInfo values
        limitQuery.list.map {
          case (id, context, app, upload, classpath, start, end, err) =>
            JobInfo(id,
              context,
              JarInfo(app, convertDateSqlToJoda(upload)),
              classpath,
              convertDateSqlToJoda(start),
              end.map(convertDateSqlToJoda(_)),
              err.map(new Throwable(_)))
        }.toSeq
    }
  }

  override def getJobInfo(jobId: String): Option[JobInfo] = {
    db withSession {
      implicit sessions =>

        // Join the JARS and JOBS tables without unnecessary columns
        val joinQuery = for {
          jar <- jars
          j <- jobs if j.jarId === jar.jarId && j.jobId === jobId
        } yield
          (j.jobId, j.contextName, jar.appName, jar.uploadTime, j.classPath, j.startTime,
            j.endTime, j.error)
        joinQuery.list.map { case (id, context, app, upload, classpath, start, end, err) =>
          JobInfo(id,
            context,
            JarInfo(app, convertDateSqlToJoda(upload)),
            classpath,
            convertDateSqlToJoda(start),
            end.map(convertDateSqlToJoda(_)),
            err.map(new Throwable(_)))
        }.headOption
    }
  }
}