icu.wuhufly.dws.slide_window_runnning13.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of shtd-bd Show documentation
bigdata source code for shtd
The newest version!
package icu.wuhufly.dws

import icu.wuhufly.utils.{CreateUtils, WriteUtils}
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SparkSession}

import java.sql.Timestamp
import java.text.SimpleDateFormat
import scala.collection.mutable

object slide_window_runnning13 {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = CreateUtils.getSpark()
    val sc: SparkContext = spark.sparkContext
    import spark.implicits._
    import org.apache.spark.sql.functions._

    spark.sql("use dwd")
    spark.sql(
      """
        |select date_format(ChangeStartTime, 'yyyy-MM-dd_') as dt,
        |  date_format(ChangeStartTime, 'HH') as hour,
        |  sum(cast(ChangeEndTime as long) - cast(ChangeStartTime as long)) as duration
        |from (
        |select to_timestamp(ChangeEndTime, 'yyyy-MM-dd HH:mm:ss') as ChangeEndTime,
        |    to_timestamp(ChangeStartTime, 'yyyy-MM-dd HH:mm:ss') as ChangeStartTime
        |  from fact_change_record
        |  where ChangeRecordState = '运行'
        |    and to_timestamp(ChangeStartTime, 'yyyy-MM-dd HH:mm:ss') >= cast('2021-10-13' as date)
        |) t1
        |  group by date_format(ChangeStartTime, 'yyyy-MM-dd_'), date_format(ChangeStartTime, 'HH')
        |""".stripMargin)
      .createOrReplaceTempView("temp")

    val sdf = new SimpleDateFormat("yyyy-MM-dd")
    var minTS: Timestamp = spark.sql("select min(to_timestamp(dt, 'yyyy-MM-dd_')) from temp").as[Timestamp].head()
    val maxTS: Timestamp = spark.sql("select max(to_timestamp(dt, 'yyyy-MM-dd_')) from temp").as[Timestamp].head()

    var dateArr: Seq[String] = collection.immutable.Seq[String]()
    while (minTS.before(maxTS)) {
      dateArr :+= sdf.format(minTS)
      minTS = new Timestamp(minTS.getTime + 1000L * 60 * 60 * 24)
    }
    dateArr :+= sdf.format(maxTS)

    spark.createDataset((0 until 24).map(i => "%02d".format(i)))
      .join(spark.createDataset(dateArr))
      .toDF("hour","dt")
      .createOrReplaceTempView("hour_tbl")


    val df: DataFrame = spark.sql(
      """
        |select concat(t1.dt, '_', t1.hour) as state_start_time,
        |  sum(t2.duration) as window_sum,
        |  avg(t2.duration) as window_avg
        |    from hour_tbl t1
        |    join temp t2
        |      on concat(t1.dt, '_') = t2.dt and cast(t1.hour as int) >= cast(t2.hour as int) and cast(t1.hour as int) - 3 < cast(t2.hour as int)
        |  group by concat(t1.dt, '_', t1.hour)
        |  having concat(t1.dt, '_', t1.hour) >= '2021-10-13_02'
        |""".stripMargin)

    WriteUtils.writeToMysql(
      "slide_window_runnning", df
    )

    sc.stop()
  }
}