icu.wuhufly.dws.slide_window_runnning13.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of shtd-bd Show documentation
Show all versions of shtd-bd Show documentation
bigdata source code for shtd
The newest version!
package icu.wuhufly.dws
import icu.wuhufly.utils.{CreateUtils, WriteUtils}
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SparkSession}
import java.sql.Timestamp
import java.text.SimpleDateFormat
import scala.collection.mutable
object slide_window_runnning13 {
def main(args: Array[String]): Unit = {
val spark: SparkSession = CreateUtils.getSpark()
val sc: SparkContext = spark.sparkContext
import spark.implicits._
import org.apache.spark.sql.functions._
spark.sql("use dwd")
spark.sql(
"""
|select date_format(ChangeStartTime, 'yyyy-MM-dd_') as dt,
| date_format(ChangeStartTime, 'HH') as hour,
| sum(cast(ChangeEndTime as long) - cast(ChangeStartTime as long)) as duration
|from (
|select to_timestamp(ChangeEndTime, 'yyyy-MM-dd HH:mm:ss') as ChangeEndTime,
| to_timestamp(ChangeStartTime, 'yyyy-MM-dd HH:mm:ss') as ChangeStartTime
| from fact_change_record
| where ChangeRecordState = '运行'
| and to_timestamp(ChangeStartTime, 'yyyy-MM-dd HH:mm:ss') >= cast('2021-10-13' as date)
|) t1
| group by date_format(ChangeStartTime, 'yyyy-MM-dd_'), date_format(ChangeStartTime, 'HH')
|""".stripMargin)
.createOrReplaceTempView("temp")
val sdf = new SimpleDateFormat("yyyy-MM-dd")
var minTS: Timestamp = spark.sql("select min(to_timestamp(dt, 'yyyy-MM-dd_')) from temp").as[Timestamp].head()
val maxTS: Timestamp = spark.sql("select max(to_timestamp(dt, 'yyyy-MM-dd_')) from temp").as[Timestamp].head()
var dateArr: Seq[String] = collection.immutable.Seq[String]()
while (minTS.before(maxTS)) {
dateArr :+= sdf.format(minTS)
minTS = new Timestamp(minTS.getTime + 1000L * 60 * 60 * 24)
}
dateArr :+= sdf.format(maxTS)
spark.createDataset((0 until 24).map(i => "%02d".format(i)))
.join(spark.createDataset(dateArr))
.toDF("hour","dt")
.createOrReplaceTempView("hour_tbl")
val df: DataFrame = spark.sql(
"""
|select concat(t1.dt, '_', t1.hour) as state_start_time,
| sum(t2.duration) as window_sum,
| avg(t2.duration) as window_avg
| from hour_tbl t1
| join temp t2
| on concat(t1.dt, '_') = t2.dt and cast(t1.hour as int) >= cast(t2.hour as int) and cast(t1.hour as int) - 3 < cast(t2.hour as int)
| group by concat(t1.dt, '_', t1.hour)
| having concat(t1.dt, '_', t1.hour) >= '2021-10-13_02'
|""".stripMargin)
WriteUtils.writeToMysql(
"slide_window_runnning", df
)
sc.stop()
}
}