All Downloads are FREE. Search and download functionalities are using the official Maven repository.

icu.wuhufly.dws.machine_data_total_time_top311.scala Maven / Gradle / Ivy

The newest version!
package icu.wuhufly.dws

import icu.wuhufly.utils.{CreateUtils, WriteUtils}
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SparkSession}

object machine_data_total_time_top311 {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = CreateUtils.getSpark()
    val sc: SparkContext = spark.sparkContext
    import spark.implicits._
    import org.apache.spark.sql.functions._

    spark.sql("use dws")
    spark.sql(
      """
        |select *, dense_rank() over(partition by machine_record_date order by total_time desc) as rn
        |  from machine_data_total_time
        |""".stripMargin)
      .createOrReplaceTempView("temp")

    val df: DataFrame = spark.sql(
      """
        |select t1.machine_record_date as day,
        |  t1.machine_id as first_id,
        |  t2.machine_id as second_id,
        |  t3.machine_id as tertiary_id,
        |  t1.total_time as first_time,
        |  t2.total_time as second_time,
        |  t3.total_time as tertiary_time
        |    from (select * from temp where rn = 1) t1
        |    join (select * from temp where rn = 2) t2
        |    join (select * from temp where rn = 3) t3
        |      on t1.machine_record_date = t2.machine_record_date
        |      and t1.machine_record_date = t3.machine_record_date
        |""".stripMargin)

    WriteUtils.writeToCK(
      "machine_data_total_time_top3", df, "day"
    )

    sc.stop()
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy