All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prediction.data.storage.jdbc.JDBCPEvents.scala Maven / Gradle / Ivy

The newest version!
/** Copyright 2015 TappingStone, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

package io.prediction.data.storage.jdbc

import java.sql.{DriverManager, ResultSet}

import com.github.nscala_time.time.Imports._
import io.prediction.data.storage.{DataMap, Event, PEvents, StorageClientConfig}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.{JdbcRDD, RDD}
import org.apache.spark.sql.{SQLContext, SaveMode}
import org.json4s.JObject
import org.json4s.native.Serialization

/** JDBC implementation of [[PEvents]] */
class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String) extends PEvents {
  @transient private implicit lazy val formats = org.json4s.DefaultFormats
  def find(
    appId: Int,
    channelId: Option[Int] = None,
    startTime: Option[DateTime] = None,
    untilTime: Option[DateTime] = None,
    entityType: Option[String] = None,
    entityId: Option[String] = None,
    eventNames: Option[Seq[String]] = None,
    targetEntityType: Option[Option[String]] = None,
    targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {
    val lower = startTime.map(_.getMillis).getOrElse(0.toLong)
    /** Change the default upper bound from +100 to +1 year because MySQL's
      * FROM_UNIXTIME(t) will return NULL if we use +100 years.
      */
    val upper = untilTime.map(_.getMillis).getOrElse((DateTime.now + 1.years).getMillis)
    val par = scala.math.min(
      new Duration(upper - lower).getStandardDays,
      config.properties.getOrElse("PARTITIONS", "4").toLong).toInt
    val entityTypeClause = entityType.map(x => s"and entityType = '$x'").getOrElse("")
    val entityIdClause = entityId.map(x => s"and entityId = '$x'").getOrElse("")
    val eventNamesClause =
      eventNames.map("and (" + _.map(y => s"event = '$y'").mkString(" or ") + ")").getOrElse("")
    val targetEntityTypeClause = targetEntityType.map(
      _.map(x => s"and targetEntityType = '$x'"
    ).getOrElse("and targetEntityType is null")).getOrElse("")
    val targetEntityIdClause = targetEntityId.map(
      _.map(x => s"and targetEntityId = '$x'"
    ).getOrElse("and targetEntityId is null")).getOrElse("")
    val q = s"""
      select
        id,
        event,
        entityType,
        entityId,
        targetEntityType,
        targetEntityId,
        properties,
        eventTime,
        eventTimeZone,
        tags,
        prId,
        creationTime,
        creationTimeZone
      from ${JDBCUtils.eventTableName(namespace, appId, channelId)}
      where
        eventTime >= ${JDBCUtils.timestampFunction(client)}(?) and
        eventTime < ${JDBCUtils.timestampFunction(client)}(?)
      $entityTypeClause
      $entityIdClause
      $eventNamesClause
      $targetEntityTypeClause
      $targetEntityIdClause
      """.replace("\n", " ")
    new JdbcRDD(
      sc,
      () => {
        DriverManager.getConnection(
          client,
          config.properties("USERNAME"),
          config.properties("PASSWORD"))
      },
      q,
      lower / 1000,
      upper / 1000,
      par,
      (r: ResultSet) => {
        Event(
          eventId = Option(r.getString("id")),
          event = r.getString("event"),
          entityType = r.getString("entityType"),
          entityId = r.getString("entityId"),
          targetEntityType = Option(r.getString("targetEntityType")),
          targetEntityId = Option(r.getString("targetEntityId")),
          properties = Option(r.getString("properties")).map(x =>
            DataMap(Serialization.read[JObject](x))).getOrElse(DataMap()),
          eventTime = new DateTime(r.getTimestamp("eventTime").getTime,
            DateTimeZone.forID(r.getString("eventTimeZone"))),
          tags = Option(r.getString("tags")).map(x =>
            x.split(",").toList).getOrElse(Nil),
          prId = Option(r.getString("prId")),
          creationTime = new DateTime(r.getTimestamp("creationTime").getTime,
            DateTimeZone.forID(r.getString("creationTimeZone"))))
      }).cache()
  }

  def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
    val sqlContext = new SQLContext(sc)

    import sqlContext.implicits._

    val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)

    val eventTableColumns = Seq[String](
        "id"
      , "event"
      , "entityType"
      , "entityId"
      , "targetEntityType"
      , "targetEntityId"
      , "properties"
      , "eventTime"
      , "eventTimeZone"
      , "tags"
      , "prId"
      , "creationTime"
      , "creationTimeZone")

    val eventDF = events.map { event =>
      (event.eventId.getOrElse(JDBCUtils.generateId)
        , event.event
        , event.entityType
        , event.entityId
        , event.targetEntityType.orNull
        , event.targetEntityId.orNull
        , if (!event.properties.isEmpty) Serialization.write(event.properties.toJObject) else null
        , new java.sql.Timestamp(event.eventTime.getMillis)
        , event.eventTime.getZone.getID
        , if (event.tags.nonEmpty) Some(event.tags.mkString(",")) else null
        , event.prId
        , new java.sql.Timestamp(event.creationTime.getMillis)
        , event.creationTime.getZone.getID)
    }.toDF(eventTableColumns:_*)

    // spark version 1.4.0 or higher
    val prop = new java.util.Properties
    prop.setProperty("user", config.properties("USERNAME"))
    prop.setProperty("password", config.properties("PASSWORD"))
    eventDF.write.mode(SaveMode.Append).jdbc(client, tableName, prop)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy