com.intel.analytics.bigdl.ppml.examples.tpch.Q08.scala Maven / Gradle / Ivy
The newest version!
// scalastyle:off
/*
* This file is copied from:
* https://github.com/ssavvides/tpch-spark/blob/master/src/main/scala/Q08.scala
*
* Copyright (c) 2015 Savvas Savvides, [email protected], [email protected]
*
* Licensed under the The MIT License:
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// scalastyle:on
package com.intel.analytics.bigdl.ppml.examples.tpch
import org.apache.spark.sql.DataFrame
import org.apache.spark.SparkContext
import org.apache.spark.sql.functions.sum
import org.apache.spark.sql.functions.udf
import com.intel.analytics.bigdl.ppml.PPMLContext
/**
* TPC-H Query 8
* Savvas Savvides
*
*/
class Q08 extends TpchQuery {
override def execute(sc: PPMLContext, schemaProvider: TpchSchemaProvider): DataFrame = {
// this is used to implicitly convert an RDD to a DataFrame.
val sqlContext = sc.getSparkSession.sqlContext
import sqlContext.implicits._
import schemaProvider._
val getYear = udf { (x: String) => x.substring(0, 4) }
val decrease = udf { (x: Double, y: Double) => x * (1 - y) }
val isBrazil = udf { (x: String, y: Double) => if (x == "BRAZIL") y else 0 }
val fregion = region.filter($"r_name" === "AMERICA")
val forder = order.filter($"o_orderdate" <= "1996-12-31" && $"o_orderdate" >= "1995-01-01")
val fpart = part.filter($"p_type" === "ECONOMY ANODIZED STEEL")
val nat = nation.join(supplier, $"n_nationkey" === supplier("s_nationkey"))
val line = lineitem.select($"l_partkey", $"l_suppkey", $"l_orderkey",
decrease($"l_extendedprice", $"l_discount").as("volume")).
join(fpart, $"l_partkey" === fpart("p_partkey"))
.join(nat, $"l_suppkey" === nat("s_suppkey"))
nation.join(fregion, $"n_regionkey" === fregion("r_regionkey"))
.select($"n_nationkey")
.join(customer, $"n_nationkey" === customer("c_nationkey"))
.select($"c_custkey")
.join(forder, $"c_custkey" === forder("o_custkey"))
.select($"o_orderkey", $"o_orderdate")
.join(line, $"o_orderkey" === line("l_orderkey"))
.select(getYear($"o_orderdate").as("o_year"), $"volume",
isBrazil($"n_name", $"volume").as("case_volume"))
.groupBy($"o_year")
.agg(sum($"case_volume") / sum("volume"))
.sort($"o_year")
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy