All Downloads are FREE. Search and download functionalities are using the official Maven repository.

icu.wuhufly.dws.machine_produce_timetop205.scala Maven / Gradle / Ivy

The newest version!
package icu.wuhufly.dws

import icu.wuhufly.utils.{CreateUtils, WriteUtils}
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SparkSession}

object machine_produce_timetop205 {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = CreateUtils.getSpark()
    val sc: SparkContext = spark.sparkContext
    import spark.implicits._
    import org.apache.spark.sql.functions._

    spark.sql("use dws")
    spark.sql(
      """
        |select produce_machine_id, producetime,
        |    dense_rank() over(partition by produce_machine_id order by producetime desc) as rn
        |  from machine_produce_per_avgtime
        |""".stripMargin)
      .createOrReplaceTempView("temp")

    val df: DataFrame = spark.sql(
      """
        |select t1.produce_machine_id as machine_id, t1.producetime as first_time,
        |  t2.producetime as second_time
        |  from (select * from temp where rn = 1) t1
        |  join (select * from temp where rn = 2) t2
        |    on t1.produce_machine_id = t2.produce_machine_id
        |""".stripMargin)

    WriteUtils.writeToCK(
      "machine_produce_timetop2", df, "machine_id"
    )

    sc.stop()
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy