com.databricks.spark.sql.perf.tpcds.SimpleQueries.scala Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2015 Databricks Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.databricks.spark.sql.perf.tpcds
import com.databricks.spark.sql.perf.{ExecutionMode, Benchmark}
trait SimpleQueries extends Benchmark {
import ExecutionMode._
val targetedPerfQueries = Seq(
// Query to measure scan performance.
("stores-sales-scan",
"""
|select * from store_sales where ss_item_sk = 1
""".stripMargin),
("fact-fact-join",
"""
| select count(*) from store_sales
| join store_returns
| on store_sales.ss_item_sk = store_returns.sr_item_sk
| and store_sales.ss_ticket_number = store_returns.sr_ticket_number
""".stripMargin)
).map { case (name, sqlText) =>
Query(name = name, sqlText = sqlText, description = "", executionMode = ForeachResults)
}
val q7Derived = Seq(
("q7-simpleScan",
"""
|select
| ss_quantity,
| ss_list_price,
| ss_coupon_amt,
| ss_coupon_amt,
| ss_cdemo_sk,
| ss_item_sk,
| ss_promo_sk,
| ss_sold_date_sk
|from store_sales
|where
| ss_sold_date_sk between 2450815 and 2451179
""".stripMargin),
("q7-twoMapJoins", """
|select
| i_item_id,
| ss_quantity,
| ss_list_price,
| ss_coupon_amt,
| ss_sales_price,
| ss_promo_sk,
| ss_sold_date_sk
|from
| store_sales
| join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
| join item on (store_sales.ss_item_sk = item.i_item_sk)
|where
| cd_gender = 'F'
| and cd_marital_status = 'W'
| and cd_education_status = 'Primary'
| and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
""".stripMargin),
("q7-fourMapJoins", """
|select
| i_item_id,
| ss_quantity,
| ss_list_price,
| ss_coupon_amt,
| ss_sales_price
|from
| store_sales
| join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk)
| join date_dim on (ss_sold_date_sk = d_date_sk)
|where
| cd_gender = 'F'
| and cd_marital_status = 'W'
| and cd_education_status = 'Primary'
| and (p_channel_email = 'N'
| or p_channel_event = 'N')
| and d_year = 1998
| -- and ss_date between '1998-01-01' and '1998-12-31'
| and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
""".stripMargin),
("q7-noOrderBy", """
|select
| i_item_id,
| avg(ss_quantity) agg1,
| avg(ss_list_price) agg2,
| avg(ss_coupon_amt) agg3,
| avg(ss_sales_price) agg4
|from
| store_sales
| join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk)
| join date_dim on (ss_sold_date_sk = d_date_sk)
|where
| cd_gender = 'F'
| and cd_marital_status = 'W'
| and cd_education_status = 'Primary'
| and (p_channel_email = 'N'
| or p_channel_event = 'N')
| and d_year = 1998
| -- and ss_date between '1998-01-01' and '1998-12-31'
| and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
|group by
| i_item_id
""".stripMargin),
("q7", """
|-- start query 1 in stream 0 using template query7.tpl
|select
| i_item_id,
| avg(ss_quantity) agg1,
| avg(ss_list_price) agg2,
| avg(ss_coupon_amt) agg3,
| avg(ss_sales_price) agg4
|from
| store_sales
| join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk)
| join date_dim on (ss_sold_date_sk = d_date_sk)
|where
| cd_gender = 'F'
| and cd_marital_status = 'W'
| and cd_education_status = 'Primary'
| and (p_channel_email = 'N'
| or p_channel_event = 'N')
| and d_year = 1998
| -- and ss_date between '1998-01-01' and '1998-12-31'
| and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
|group by
| i_item_id
|order by
| i_item_id
|limit 100
|-- end query 1 in stream 0 using template query7.tpl
""".stripMargin),
("store_sales-selfjoin-1", """
|-- The join condition will yield many matches.
|select
| t1.ss_quantity,
| t1.ss_list_price,
| t1.ss_coupon_amt,
| t1.ss_cdemo_sk,
| t1.ss_item_sk,
| t1.ss_promo_sk,
| t1.ss_sold_date_sk
|from store_sales t1 join store_sales t2 on t1.ss_item_sk = t2.ss_item_sk
|where
| t1.ss_sold_date_sk between 2450815 and 2451179
""".stripMargin),
("store_sales-selfjoin-2", """
|-- We ust comound primary key as the join condition. The size of output is comparable with the input table.
|select
| t1.ss_quantity,
| t1.ss_list_price,
| t1.ss_coupon_amt,
| t1.ss_cdemo_sk,
| t1.ss_item_sk,
| t1.ss_promo_sk,
| t1.ss_sold_date_sk
|from store_sales t1 join store_sales t2 on t1.ss_item_sk = t2.ss_item_sk and t1.ss_ticket_number = t2.ss_ticket_number
|where
| t1.ss_sold_date_sk between 2450815 and 2451179
""".stripMargin)
).map { case (name, sqlText) =>
Query(name = name, sqlText = sqlText, description = "", executionMode = ForeachResults)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy