com.databricks.spark.sql.perf.tpcds.ImpalaKitQueries.scala Maven / Gradle / Ivy
/*
* Copyright 2015 Databricks Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.databricks.spark.sql.perf.tpcds
import com.databricks.spark.sql.perf.{ExecutionMode, Benchmark}
trait ImpalaKitQueries extends Benchmark {
import ExecutionMode._
// Queries are from
// https://github.com/cloudera/impala-tpcds-kit/tree/master/queries-sql92-modified/queries
val queries = Seq(
("q19", """
|-- start query 1 in stream 0 using template query19.tpl
|select
| i_brand_id,
| i_brand,
| i_manufact_id,
| i_manufact,
| sum(ss_ext_sales_price) ext_price
|from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join customer on (store_sales.ss_customer_sk = customer.c_customer_sk)
| join customer_address on (customer.c_current_addr_sk = customer_address.ca_address_sk)
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
|where
| --ss_date between '1999-11-01' and '1999-11-30'
| ss_sold_date_sk between 2451484 and 2451513
| and d_moy = 11
| and d_year = 1999
| and i_manager_id = 7
| and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
|group by
| i_brand,
| i_brand_id,
| i_manufact_id,
| i_manufact
|order by
| ext_price desc,
| i_brand,
| i_brand_id,
| i_manufact_id,
| i_manufact
|limit 100
|-- end query 1 in stream 0 using template query19.tpl
""".stripMargin),
("q27", """
|-- start query 1 in stream 0 using template query27.tpl
|select
| i_item_id,
| s_state,
| -- grouping(s_state) g_state,
| avg(ss_quantity) agg1,
| avg(ss_list_price) agg2,
| avg(ss_coupon_amt) agg3,
| avg(ss_sales_price) agg4
|from
| store_sales
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
|where
| -- ss_date between '1998-01-01' and '1998-12-31'
| ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
| and d_year = 1998
| and s_state in ('WI', 'CA', 'TX', 'FL', 'WA', 'TN')
| and cd_gender = 'F'
| and cd_marital_status = 'W'
| and cd_education_status = 'Primary'
|group by
| -- rollup(i_item_id, s_state)
| i_item_id,
| s_state
|order by
| i_item_id,
| s_state
|limit 100
|-- end query 1 in stream 0 using template query27.tpl
""".stripMargin),
("q3", """
|-- start query 1 in stream 0 using template query3.tpl
|select
| dt.d_year,
| -- year(ss_date) as d_year,
| -- case
| -- when ss_sold_date_sk between 2451149 and 2451179 then 1998
| -- when ss_sold_date_sk between 2451514 and 2451544 then 1999
| -- when ss_sold_date_sk between 2451880 and 2451910 then 2000
| -- when ss_sold_date_sk between 2452245 and 2452275 then 2001
| -- when ss_sold_date_sk between 2452610 and 2452640 then 2002
| -- end as d_year,
| item.i_brand_id brand_id,
| item.i_brand brand,
| sum(ss_ext_sales_price) sum_agg
|from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join date_dim dt on (dt.d_date_sk = store_sales.ss_sold_date_sk)
|where
| item.i_manufact_id = 436
| and dt.d_moy = 12
| -- and (ss_date between '1998-12-01' and '1998-12-31'
| -- or ss_date between '1999-12-01' and '1999-12-31'
| -- or ss_date between '2000-12-01' and '2000-12-31'
| -- or ss_date between '2001-12-01' and '2001-12-31'
| -- or ss_date between '2002-12-01' and '2002-12-31')
| and (ss_sold_date_sk between 2451149 and 2451179
| or ss_sold_date_sk between 2451514 and 2451544
| or ss_sold_date_sk between 2451880 and 2451910
| or ss_sold_date_sk between 2452245 and 2452275
| or ss_sold_date_sk between 2452610 and 2452640)
|group by
| d_year,
| item.i_brand,
| item.i_brand_id
|order by
| d_year,
| sum_agg desc,
| brand_id
|-- end query 1 in stream 0 using template query3.tpl
|limit 100
""".stripMargin),
("q34", """
|-- start query 1 in stream 0 using template query34.tpl
|select
| c_last_name,
| c_first_name,
| c_salutation,
| c_preferred_cust_flag,
| ss_ticket_number,
| cnt
|from
| (select
| ss_ticket_number,
| ss_customer_sk,
| count(*) cnt
| from
| store_sales
| join household_demographics on (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| date_dim.d_year in (1998, 1998 + 1, 1998 + 2)
| and (date_dim.d_dom between 1 and 3
| or date_dim.d_dom between 25 and 28)
| and (household_demographics.hd_buy_potential = '>10000'
| or household_demographics.hd_buy_potential = 'unknown')
| and household_demographics.hd_vehicle_count > 0
| and (case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count / household_demographics.hd_vehicle_count else null end) > 1.2
| and store.s_county in ('Saginaw County', 'Sumner County', 'Appanoose County', 'Daviess County', 'Fairfield County', 'Raleigh County', 'Ziebach County', 'Williamson County')
| and ss_sold_date_sk between 2450816 and 2451910 -- partition key filter
| group by
| ss_ticket_number,
| ss_customer_sk
| ) dn
|join customer on (dn.ss_customer_sk = customer.c_customer_sk)
|where
| cnt between 15 and 20
|order by
| c_last_name,
| c_first_name,
| c_salutation,
| c_preferred_cust_flag desc,
| ss_ticket_number,
| cnt
|limit 1000
|-- end query 1 in stream 0 using template query34.tpl
""".stripMargin),
("q42", """
|-- start query 1 in stream 0 using template query42.tpl
|select
| d_year,
| i_category_id,
| i_category,
| sum(ss_ext_sales_price) as total_price
|from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join date_dim dt on (dt.d_date_sk = store_sales.ss_sold_date_sk)
|where
| item.i_manager_id = 1
| and dt.d_moy = 12
| and dt.d_year = 1998
| -- and ss_date between '1998-12-01' and '1998-12-31'
| and ss_sold_date_sk between 2451149 and 2451179 -- partition key filter
|group by
| d_year,
| i_category_id,
| i_category
|order by
| -- sum(ss_ext_sales_price) desc,
| total_price desc,
| d_year,
| i_category_id,
| i_category
|limit 100
|-- end query 1 in stream 0 using template query42.tpl
""".stripMargin),
("q43", """
|-- start query 1 in stream 0 using template query43.tpl
|select
| s_store_name,
| s_store_id,
| sum(case when (d_day_name = 'Sunday') then ss_sales_price else null end) sun_sales,
| sum(case when (d_day_name = 'Monday') then ss_sales_price else null end) mon_sales,
| sum(case when (d_day_name = 'Tuesday') then ss_sales_price else null end) tue_sales,
| sum(case when (d_day_name = 'Wednesday') then ss_sales_price else null end) wed_sales,
| sum(case when (d_day_name = 'Thursday') then ss_sales_price else null end) thu_sales,
| sum(case when (d_day_name = 'Friday') then ss_sales_price else null end) fri_sales,
| sum(case when (d_day_name = 'Saturday') then ss_sales_price else null end) sat_sales
|from
| store_sales
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
|where
| s_gmt_offset = -5
| and d_year = 1998
| -- and ss_date between '1998-01-01' and '1998-12-31'
| and ss_sold_date_sk between 2450816 and 2451179 -- partition key filter
|group by
| s_store_name,
| s_store_id
|order by
| s_store_name,
| s_store_id,
| sun_sales,
| mon_sales,
| tue_sales,
| wed_sales,
| thu_sales,
| fri_sales,
| sat_sales
|limit 100
|-- end query 1 in stream 0 using template query43.tpl
""".stripMargin),
("q46", """
|-- start query 1 in stream 0 using template query46.tpl
|select
| c_last_name,
| c_first_name,
| ca_city,
| bought_city,
| ss_ticket_number,
| amt,
| profit
|from
| (select
| ss_ticket_number,
| ss_customer_sk,
| ca_city bought_city,
| sum(ss_coupon_amt) amt,
| sum(ss_net_profit) profit
| from
| store_sales
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join household_demographics on (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| join customer_address on (store_sales.ss_addr_sk = customer_address.ca_address_sk)
| where
| store.s_city in ('Midway', 'Concord', 'Spring Hill', 'Brownsville', 'Greenville')
| and (household_demographics.hd_dep_count = 5
| or household_demographics.hd_vehicle_count = 3)
| and date_dim.d_dow in (6, 0)
| and date_dim.d_year in (1999, 1999 + 1, 1999 + 2)
| -- and ss_date between '1999-01-01' and '2001-12-31'
| -- and ss_sold_date_sk between 2451180 and 2452275 -- partition key filter
| and ss_sold_date_sk in (2451181, 2451182, 2451188, 2451189, 2451195, 2451196, 2451202, 2451203, 2451209, 2451210, 2451216, 2451217,
| 2451223, 2451224, 2451230, 2451231, 2451237, 2451238, 2451244, 2451245, 2451251, 2451252, 2451258, 2451259,
| 2451265, 2451266, 2451272, 2451273, 2451279, 2451280, 2451286, 2451287, 2451293, 2451294, 2451300, 2451301,
| 2451307, 2451308, 2451314, 2451315, 2451321, 2451322, 2451328, 2451329, 2451335, 2451336, 2451342, 2451343,
| 2451349, 2451350, 2451356, 2451357, 2451363, 2451364, 2451370, 2451371, 2451377, 2451378, 2451384, 2451385,
| 2451391, 2451392, 2451398, 2451399, 2451405, 2451406, 2451412, 2451413, 2451419, 2451420, 2451426, 2451427,
| 2451433, 2451434, 2451440, 2451441, 2451447, 2451448, 2451454, 2451455, 2451461, 2451462, 2451468, 2451469,
| 2451475, 2451476, 2451482, 2451483, 2451489, 2451490, 2451496, 2451497, 2451503, 2451504, 2451510, 2451511,
| 2451517, 2451518, 2451524, 2451525, 2451531, 2451532, 2451538, 2451539, 2451545, 2451546, 2451552, 2451553,
| 2451559, 2451560, 2451566, 2451567, 2451573, 2451574, 2451580, 2451581, 2451587, 2451588, 2451594, 2451595,
| 2451601, 2451602, 2451608, 2451609, 2451615, 2451616, 2451622, 2451623, 2451629, 2451630, 2451636, 2451637,
| 2451643, 2451644, 2451650, 2451651, 2451657, 2451658, 2451664, 2451665, 2451671, 2451672, 2451678, 2451679,
| 2451685, 2451686, 2451692, 2451693, 2451699, 2451700, 2451706, 2451707, 2451713, 2451714, 2451720, 2451721,
| 2451727, 2451728, 2451734, 2451735, 2451741, 2451742, 2451748, 2451749, 2451755, 2451756, 2451762, 2451763,
| 2451769, 2451770, 2451776, 2451777, 2451783, 2451784, 2451790, 2451791, 2451797, 2451798, 2451804, 2451805,
| 2451811, 2451812, 2451818, 2451819, 2451825, 2451826, 2451832, 2451833, 2451839, 2451840, 2451846, 2451847,
| 2451853, 2451854, 2451860, 2451861, 2451867, 2451868, 2451874, 2451875, 2451881, 2451882, 2451888, 2451889,
| 2451895, 2451896, 2451902, 2451903, 2451909, 2451910, 2451916, 2451917, 2451923, 2451924, 2451930, 2451931,
| 2451937, 2451938, 2451944, 2451945, 2451951, 2451952, 2451958, 2451959, 2451965, 2451966, 2451972, 2451973,
| 2451979, 2451980, 2451986, 2451987, 2451993, 2451994, 2452000, 2452001, 2452007, 2452008, 2452014, 2452015,
| 2452021, 2452022, 2452028, 2452029, 2452035, 2452036, 2452042, 2452043, 2452049, 2452050, 2452056, 2452057,
| 2452063, 2452064, 2452070, 2452071, 2452077, 2452078, 2452084, 2452085, 2452091, 2452092, 2452098, 2452099,
| 2452105, 2452106, 2452112, 2452113, 2452119, 2452120, 2452126, 2452127, 2452133, 2452134, 2452140, 2452141,
| 2452147, 2452148, 2452154, 2452155, 2452161, 2452162, 2452168, 2452169, 2452175, 2452176, 2452182, 2452183,
| 2452189, 2452190, 2452196, 2452197, 2452203, 2452204, 2452210, 2452211, 2452217, 2452218, 2452224, 2452225,
| 2452231, 2452232, 2452238, 2452239, 2452245, 2452246, 2452252, 2452253, 2452259, 2452260, 2452266, 2452267,
| 2452273, 2452274)
| group by
| ss_ticket_number,
| ss_customer_sk,
| ss_addr_sk,
| ca_city
| ) dn
| join customer on (dn.ss_customer_sk = customer.c_customer_sk)
| join customer_address current_addr on (customer.c_current_addr_sk = current_addr.ca_address_sk)
|where
| current_addr.ca_city <> bought_city
|order by
| c_last_name,
| c_first_name,
| ca_city,
| bought_city,
| ss_ticket_number
|limit 100
|-- end query 1 in stream 0 using template query46.tpl
""".stripMargin),
("q52", """
|-- start query 1 in stream 0 using template query52.tpl
|select
| d_year,
| i_brand_id,
| i_brand,
| sum(ss_ext_sales_price) ext_price
|from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join date_dim dt on (store_sales.ss_sold_date_sk = dt.d_date_sk)
|where
| i_manager_id = 1
| and d_moy = 12
| and d_year = 1998
| -- and ss_date between '1998-12-01' and '1998-12-31'
| and ss_sold_date_sk between 2451149 and 2451179 -- partition key filter
|group by
| d_year,
| i_brand,
| i_brand_id
|order by
| d_year,
| ext_price desc,
| i_brand_id
|limit 100
|-- end query 1 in stream 0 using template query52.tpl
""".stripMargin),
("q53", """
|-- start query 1 in stream 0 using template query53.tpl
|select
| *
|from
| (select
| i_manufact_id,
| sum(ss_sales_price) sum_sales
| -- avg(sum(ss_sales_price)) over(partition by i_manufact_id) avg_quarterly_sales
| from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| ss_sold_date_sk between 2451911 and 2452275 -- partition key filter
| -- ss_date between '2001-01-01' and '2001-12-31'
| and d_month_seq in(1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
| and (
| (i_category in('Books', 'Children', 'Electronics')
| and i_class in('personal', 'portable', 'reference', 'self-help')
| and i_brand in('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')
| )
| or
| (i_category in('Women', 'Music', 'Men')
| and i_class in('accessories', 'classical', 'fragrances', 'pants')
| and i_brand in('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')
| )
| )
| group by
| i_manufact_id,
| d_qoy
| ) tmp1
|-- where
|-- case when avg_quarterly_sales > 0 then abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales else null end > 0.1
|order by
| -- avg_quarterly_sales,
| sum_sales,
| i_manufact_id
|limit 100
|-- end query 1 in stream 0 using template query53.tpl
""".stripMargin),
("q55", """
|-- start query 1 in stream 0 using template query55.tpl
|select
| i_brand_id,
| i_brand,
| sum(ss_ext_sales_price) ext_price
|from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
|where
| i_manager_id = 36
| and d_moy = 12
| and d_year = 2001
| -- and ss_date between '2001-12-01' and '2001-12-31'
| and ss_sold_date_sk between 2452245 and 2452275 -- partition key filter
|group by
| i_brand,
| i_brand_id
|order by
| ext_price desc,
| i_brand_id
|limit 100
|-- end query 1 in stream 0 using template query55.tpl
""".stripMargin),
("q59", """
|-- start query 1 in stream 0 using template query59.tpl
|select
| s_store_name1,
| s_store_id1,
| d_week_seq1,
| sun_sales1 / sun_sales2,
| mon_sales1 / mon_sales2,
| tue_sales1 / tue_sales2,
| wed_sales1 / wed_sales2,
| thu_sales1 / thu_sales2,
| fri_sales1 / fri_sales2,
| sat_sales1 / sat_sales2
|from
| (select
| s_store_name s_store_name1,
| wss.d_week_seq d_week_seq1,
| s_store_id s_store_id1,
| sun_sales sun_sales1,
| mon_sales mon_sales1,
| tue_sales tue_sales1,
| wed_sales wed_sales1,
| thu_sales thu_sales1,
| fri_sales fri_sales1,
| sat_sales sat_sales1
| from
| (select
| d_week_seq,
| ss_store_sk,
| sum(case when(d_day_name = 'Sunday') then ss_sales_price else null end) sun_sales,
| sum(case when(d_day_name = 'Monday') then ss_sales_price else null end) mon_sales,
| sum(case when(d_day_name = 'Tuesday') then ss_sales_price else null end) tue_sales,
| sum(case when(d_day_name = 'Wednesday') then ss_sales_price else null end) wed_sales,
| sum(case when(d_day_name = 'Thursday') then ss_sales_price else null end) thu_sales,
| sum(case when(d_day_name = 'Friday') then ss_sales_price else null end) fri_sales,
| sum(case when(d_day_name = 'Saturday') then ss_sales_price else null end) sat_sales
| from
| store_sales
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| -- ss_date between '1998-10-01' and '1999-09-30'
| ss_sold_date_sk between 2451088 and 2451452
| group by
| d_week_seq,
| ss_store_sk
| ) wss
| join store on (wss.ss_store_sk = store.s_store_sk)
| join date_dim d on (wss.d_week_seq = d.d_week_seq)
| where
| d_month_seq between 1185 and 1185 + 11
| ) y
| join
| (select
| s_store_name s_store_name2,
| wss.d_week_seq d_week_seq2,
| s_store_id s_store_id2,
| sun_sales sun_sales2,
| mon_sales mon_sales2,
| tue_sales tue_sales2,
| wed_sales wed_sales2,
| thu_sales thu_sales2,
| fri_sales fri_sales2,
| sat_sales sat_sales2
| from
| (select
| d_week_seq,
| ss_store_sk,
| sum(case when(d_day_name = 'Sunday') then ss_sales_price else null end) sun_sales,
| sum(case when(d_day_name = 'Monday') then ss_sales_price else null end) mon_sales,
| sum(case when(d_day_name = 'Tuesday') then ss_sales_price else null end) tue_sales,
| sum(case when(d_day_name = 'Wednesday') then ss_sales_price else null end) wed_sales,
| sum(case when(d_day_name = 'Thursday') then ss_sales_price else null end) thu_sales,
| sum(case when(d_day_name = 'Friday') then ss_sales_price else null end) fri_sales,
| sum(case when(d_day_name = 'Saturday') then ss_sales_price else null end) sat_sales
| from
| store_sales
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| -- ss_date between '1999-10-01' and '2000-09-30'
| ss_sold_date_sk between 2451088 and 2451452
| group by
| d_week_seq,
| ss_store_sk
| ) wss
| join store on (wss.ss_store_sk = store.s_store_sk)
| join date_dim d on (wss.d_week_seq = d.d_week_seq)
| where
| d_month_seq between 1185 + 12 and 1185 + 23
| ) x
| on (y.s_store_id1 = x.s_store_id2)
|where
| d_week_seq1 = d_week_seq2 - 52
|order by
| s_store_name1,
| s_store_id1,
| d_week_seq1
|limit 100
|-- end query 1 in stream 0 using template query59.tpl
""".stripMargin),
("q63", """
|-- start query 1 in stream 0 using template query63.tpl
|select
| *
|from
| (select
| i_manager_id,
| sum(ss_sales_price) sum_sales
| -- avg(sum(ss_sales_price)) over(partition by i_manager_id) avg_monthly_sales
| from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| ss_sold_date_sk between 2451911 and 2452275 -- partition key filter
| -- ss_date between '2001-01-01' and '2001-12-31'
| and d_month_seq in (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
| and (
| (i_category in('Books', 'Children', 'Electronics')
| and i_class in('personal', 'portable', 'refernece', 'self-help')
| and i_brand in('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')
| )
| or
| (i_category in('Women', 'Music', 'Men')
| and i_class in('accessories', 'classical', 'fragrances', 'pants')
| and i_brand in('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')
| )
| )
| group by
| i_manager_id,
| d_moy
| ) tmp1
|-- where
|-- case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
|order by
| i_manager_id,
| -- avg_monthly_sales,
| sum_sales
|limit 100
|-- end query 1 in stream 0 using template query63.tpl
""".stripMargin),
("q65", """
|--q65
|-- start query 1 in stream 0 using template query65.tpl
|select
| s_store_name,
| i_item_desc,
| sc.revenue,
| i_current_price,
| i_wholesale_cost,
| i_brand
|from
| (select
| ss_store_sk,
| ss_item_sk,
| sum(ss_sales_price) as revenue
| from
| store_sales
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| -- ss_date between '2001-01-01' and '2001-12-31'
| ss_sold_date_sk between 2451911 and 2452275 -- partition key filter
| and d_month_seq between 1212 and 1212 + 11
| group by
| ss_store_sk,
| ss_item_sk
| ) sc
| join item on (sc.ss_item_sk = item.i_item_sk)
| join store on (sc.ss_store_sk = store.s_store_sk)
| join
| (select
| ss_store_sk,
| avg(revenue) as ave
| from
| (select
| ss_store_sk,
| ss_item_sk,
| sum(ss_sales_price) as revenue
| from
| store_sales
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| -- ss_date between '2001-01-01' and '2001-12-31'
| ss_sold_date_sk between 2451911 and 2452275 -- partition key filter
| and d_month_seq between 1212 and 1212 + 11
| group by
| ss_store_sk,
| ss_item_sk
| ) sa
| group by
| ss_store_sk
| ) sb on (sc.ss_store_sk = sb.ss_store_sk) -- 676 rows
|where
| sc.revenue <= 0.1 * sb.ave
|order by
| s_store_name,
| i_item_desc
|limit 100
|-- end query 1 in stream 0 using template query65.tpl
""".stripMargin),
("q68", """
|-- start query 1 in stream 0 using template query68.tpl
|select
| c_last_name,
| c_first_name,
| ca_city,
| bought_city,
| ss_ticket_number,
| extended_price,
| extended_tax,
| list_price
|from
| (select
| ss_ticket_number,
| ss_customer_sk,
| ca_city bought_city,
| sum(ss_ext_sales_price) extended_price,
| sum(ss_ext_list_price) list_price,
| sum(ss_ext_tax) extended_tax
| from
| store_sales
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join household_demographics on (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| join customer_address on (store_sales.ss_addr_sk = customer_address.ca_address_sk)
| where
| store.s_city in('Midway', 'Fairview')
| --and date_dim.d_dom between 1 and 2
| --and date_dim.d_year in(1999, 1999 + 1, 1999 + 2)
| -- and ss_date between '1999-01-01' and '2001-12-31'
| -- and dayofmonth(ss_date) in (1,2)
| -- and ss_sold_date_sk in (2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331,
| -- 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485,
| -- 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666,
| -- 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820,
| -- 2451850, 2451851, 2451880, 2451881, 2451911, 2451912, 2451942, 2451943, 2451970, 2451971, 2452001,
| -- 2452002, 2452031, 2452032, 2452062, 2452063, 2452092, 2452093, 2452123, 2452124, 2452154, 2452155,
| -- 2452184, 2452185, 2452215, 2452216, 2452245, 2452246)
| and (household_demographics.hd_dep_count = 5
| or household_demographics.hd_vehicle_count = 3)
| and d_date between '1999-01-01' and '1999-03-31'
| and ss_sold_date_sk between 2451180 and 2451269 -- partition key filter (3 months)
| group by
| ss_ticket_number,
| ss_customer_sk,
| ss_addr_sk,
| ca_city
| ) dn
| join customer on (dn.ss_customer_sk = customer.c_customer_sk)
| join customer_address current_addr on (customer.c_current_addr_sk = current_addr.ca_address_sk)
|where
| current_addr.ca_city <> bought_city
|order by
| c_last_name,
| ss_ticket_number
|limit 100
|-- end query 1 in stream 0 using template query68.tpl
""".stripMargin),
("q7", """
|-- start query 1 in stream 0 using template query7.tpl
|select
| i_item_id,
| avg(ss_quantity) agg1,
| avg(ss_list_price) agg2,
| avg(ss_coupon_amt) agg3,
| avg(ss_sales_price) agg4
|from
| store_sales
| join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk)
| join date_dim on (ss_sold_date_sk = d_date_sk)
|where
| cd_gender = 'F'
| and cd_marital_status = 'W'
| and cd_education_status = 'Primary'
| and (p_channel_email = 'N'
| or p_channel_event = 'N')
| and d_year = 1998
| -- and ss_date between '1998-01-01' and '1998-12-31'
| and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
|group by
| i_item_id
|order by
| i_item_id
|limit 100
|-- end query 1 in stream 0 using template query7.tpl
""".stripMargin),
("q73", """
|-- start query 1 in stream 0 using template query73.tpl
|select
| c_last_name,
| c_first_name,
| c_salutation,
| c_preferred_cust_flag,
| ss_ticket_number,
| cnt
|from
| (select
| ss_ticket_number,
| ss_customer_sk,
| count(*) cnt
| from
| store_sales
| join household_demographics on (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| -- join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| store.s_county in ('Williamson County','Franklin Parish','Bronx County','Orange County')
| -- and date_dim.d_dom between 1 and 2
| -- and date_dim.d_year in(1998, 1998 + 1, 1998 + 2)
| -- and ss_date between '1999-01-01' and '2001-12-02'
| -- and dayofmonth(ss_date) in (1,2)
| -- partition key filter
| -- and ss_sold_date_sk in (2450816, 2450846, 2450847, 2450874, 2450875, 2450905, 2450906, 2450935, 2450936, 2450966, 2450967,
| -- 2450996, 2450997, 2451027, 2451028, 2451058, 2451059, 2451088, 2451089, 2451119, 2451120, 2451149,
| -- 2451150, 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301,
| -- 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484,
| -- 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637,
| -- 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819,
| -- 2451820, 2451850, 2451851, 2451880, 2451881)
| and (household_demographics.hd_buy_potential = '>10000'
| or household_demographics.hd_buy_potential = 'unknown')
| and household_demographics.hd_vehicle_count > 0
| and case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count / household_demographics.hd_vehicle_count else null end > 1
| and ss_sold_date_sk between 2451180 and 2451269 -- partition key filter (3 months)
| group by
| ss_ticket_number,
| ss_customer_sk
| ) dj
| join customer on (dj.ss_customer_sk = customer.c_customer_sk)
|where
| cnt between 1 and 5
|order by
| cnt desc
|limit 1000
|-- end query 1 in stream 0 using template query73.tpl
""".stripMargin),
("q79", """
|-- start query 1 in stream 0 using template query79.tpl
|select
| c_last_name,
| c_first_name,
| substr(s_city, 1, 30) as city,
| ss_ticket_number,
| amt,
| profit
|from
| (select
| ss_ticket_number,
| ss_customer_sk,
| s_city,
| sum(ss_coupon_amt) amt,
| sum(ss_net_profit) profit
| from
| store_sales
| join household_demographics on (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| where
| store.s_number_employees between 200 and 295
| and (household_demographics.hd_dep_count = 8
| or household_demographics.hd_vehicle_count > 0)
| and date_dim.d_dow = 1
| and date_dim.d_year in (1998, 1998 + 1, 1998 + 2)
| -- and ss_date between '1998-01-01' and '2000-12-25'
| -- 156 days
| and d_date between '1999-01-01' and '1999-03-31'
| and ss_sold_date_sk between 2451180 and 2451269 -- partition key filter
| group by
| ss_ticket_number,
| ss_customer_sk,
| ss_addr_sk,
| s_city
| ) ms
| join customer on (ms.ss_customer_sk = customer.c_customer_sk)
|order by
| c_last_name,
| c_first_name,
| -- substr(s_city, 1, 30),
| city,
| profit
|limit 100
|-- end query 1 in stream 0 using template query79.tpl
""".stripMargin),
("q8", """
|
|-- start query 1 in stream 0 using template query8.tpl
|select
| s_store_name,
| sum(ss_net_profit)
|from
| store_sales
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| -- join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| join
| (select
| a.ca_zip
| from
| (select
| substr(ca_zip, 1, 5) ca_zip,
| count( *) cnt
| from
| customer_address
| join customer on (customer_address.ca_address_sk = customer.c_current_addr_sk)
| where
| c_preferred_cust_flag = 'Y'
| group by
| ca_zip
| having
| count( *) > 10
| ) a
| left semi join
| (select
| substr(ca_zip, 1, 5) ca_zip
| from
| customer_address
| where
| substr(ca_zip, 1, 5) in ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779',
| '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797',
| '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068',
| '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053',
| '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425',
| '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835',
| '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360',
| '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982',
| '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447',
| '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792',
| '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561',
| '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458',
| '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648',
| '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799',
| '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036',
| '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177',
| '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121',
| '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560',
| '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619',
| '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010',
| '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705',
| '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928',
| '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047',
| '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998',
| '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744',
| '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309',
| '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622',
| '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936',
| '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492')
| ) b
| on (a.ca_zip = b.ca_zip)
| ) v1 on (substr(store.s_zip, 1, 2) = substr(v1.ca_zip, 1, 2))
|where
| ss_date between '2002-01-01' and '2002-04-01'
| -- and d_qoy = 1
| -- and d_year = 2002
|group by
| s_store_name
|order by
| s_store_name
|limit 100;
|-- end query 1 in stream 0 using template query8.tpl
""".stripMargin),
("q82", """
|-- start query 1 in stream 0 using template query82.tpl
|select
| i_item_id,
| i_item_desc,
| i_current_price
|from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join inventory on (item.i_item_sk = inventory.inv_item_sk)
| -- join date_dim on (inventory.inv_date_sk = date_dim.d_date_sk)
|where
| i_current_price between 30 and 30 + 30
| and i_manufact_id in (437, 129, 727, 663)
| and inv_quantity_on_hand between 100 and 500
| and inv_date between '2002-05-30' and '2002-07-29'
| -- and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60)
|group by
| i_item_id,
| i_item_desc,
| i_current_price
|order by
| i_item_id
|limit 100
|-- end query 1 in stream 0 using template query82.tpl
""".stripMargin),
("q89", """
|-- start query 1 in stream 0 using template query89.tpl
|select
| *
|from
| (select
| i_category,
| i_class,
| i_brand,
| s_store_name,
| s_company_name,
| d_moy,
| sum(ss_sales_price) sum_sales
| -- avg(sum(ss_sales_price)) over (partition by i_category, i_brand, s_store_name, s_company_name) avg_monthly_sales
| from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join store on (store_sales.ss_store_sk = store.s_store_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| -- ss_date between '2000-01-01' and '2000-12-31'
| ss_sold_date_sk between 2451545 and 2451910 -- partition key filter
| and d_year in (2000)
| and ((i_category in('Home', 'Books', 'Electronics')
| and i_class in('wallpaper', 'parenting', 'musical'))
| or (i_category in('Shoes', 'Jewelry', 'Men')
| and i_class in('womens', 'birdal', 'pants'))
| )
| group by
| i_category,
| i_class,
| i_brand,
| s_store_name,
| s_company_name,
| d_moy
| ) tmp1
|-- where
|-- case when(avg_monthly_sales <> 0) then(abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
|order by
| -- sum_sales - avg_monthly_sales,
| sum_sales,
| s_store_name
|limit 100
|-- end query 1 in stream 0 using template query89.tpl
""".stripMargin),
("q98", """
|-- start query 1 in stream 0 using template query98.tpl
|select
| i_item_desc,
| i_category,
| i_class,
| i_current_price,
| sum(ss_ext_sales_price) as itemrevenue
| -- sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) over (partition by i_class) as revenueratio
|from
| store_sales
| join item on (store_sales.ss_item_sk = item.i_item_sk)
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
|where
| i_category in('Jewelry', 'Sports', 'Books')
| -- and d_date between cast('2001-01-12' as date) and (cast('2001-01-12' as date) + 30)
| -- and d_date between '2001-01-12' and '2001-02-11'
| -- and ss_date between '2001-01-12' and '2001-02-11'
| -- and ss_sold_date_sk between 2451922 and 2451952 -- partition key filter
| and ss_sold_date_sk between 2451911 and 2451941 -- partition key filter (1 calendar month)
| and d_date between '2001-01-01' and '2001-01-31'
|group by
| i_item_id,
| i_item_desc,
| i_category,
| i_class,
| i_current_price
|order by
| i_category,
| i_class,
| i_item_id,
| i_item_desc
| -- revenueratio
|limit 1000
|-- end query 1 in stream 0 using template query98.tpl
""".stripMargin),
("ss_max", """
|select
| count(*) as total,
| count(ss_sold_date_sk) as not_null_total,
| count(distinct ss_sold_date_sk) as unique_days,
| max(ss_sold_date_sk) as max_ss_sold_date_sk,
| max(ss_sold_time_sk) as max_ss_sold_time_sk,
| max(ss_item_sk) as max_ss_item_sk,
| max(ss_customer_sk) as max_ss_customer_sk,
| max(ss_cdemo_sk) as max_ss_cdemo_sk,
| max(ss_hdemo_sk) as max_ss_hdemo_sk,
| max(ss_addr_sk) as max_ss_addr_sk,
| max(ss_store_sk) as max_ss_store_sk,
| max(ss_promo_sk) as max_ss_promo_sk
|from store_sales
""".stripMargin)
).map {
case (name, sqlText) => Query(name, sqlText, description = "", executionMode = CollectResults)
}
val queriesMap = queries.map(q => q.name -> q).toMap
val originalQueries = Seq(
("q3", """
select d_year
,item.i_brand_id brand_id
,item.i_brand brand
,sum(ss_ext_sales_price) sum_agg
from date_dim dt
JOIN store_sales on dt.d_date_sk = store_sales.ss_sold_date_sk
JOIN item on store_sales.ss_item_sk = item.i_item_sk
where
item.i_manufact_id = 436
and dt.d_moy=12
group by d_year
,item.i_brand
,item.i_brand_id
order by d_year
,sum_agg desc
,brand_id
limit 100"""),
("q7", """
select i_item_id,
avg(ss_quantity) agg1,
avg(ss_list_price) agg2,
avg(ss_coupon_amt) agg3,
avg(ss_sales_price) agg4
from store_sales
JOIN customer_demographics ON store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk
JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk
JOIN item ON store_sales.ss_item_sk = item.i_item_sk
JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk
where
cd_gender = 'F' and
cd_marital_status = 'W' and
cd_education_status = 'Primary' and
(p_channel_email = 'N' or p_channel_event = 'N') and
d_year = 1998
group by i_item_id
order by i_item_id
limit 100"""),
("q19", """
select i_brand_id, i_brand, i_manufact_id, i_manufact,
sum(ss_ext_sales_price) as ext_price
from date_dim
JOIN store_sales ON date_dim.d_date_sk = store_sales.ss_sold_date_sk
JOIN item ON store_sales.ss_item_sk = item.i_item_sk
JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk
JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
where
i_manager_id=7
and d_moy=11
and d_year=1999
and substr(ca_zip,1,5) <> substr(s_zip,1,5)
group by i_brand
,i_brand_id
,i_manufact_id
,i_manufact
order by ext_price desc
,i_brand
,i_brand_id
,i_manufact_id
,i_manufact
limit 100"""),
("q27", """
select i_item_id,
s_state,
avg(ss_quantity) agg1,
avg(ss_list_price) agg2,
avg(ss_coupon_amt) agg3,
avg(ss_sales_price) agg4
from store_sales
JOIN customer_demographics ON store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk
JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN item ON store_sales.ss_item_sk = item.i_item_sk
where
cd_gender = 'F' and
cd_marital_status = 'W' and
cd_education_status = 'Primary' and
d_year = 1998 and
s_state = 'TN'
group by i_item_id, s_state
order by i_item_id
,s_state
limit 100"""),
("q34", """
select c_last_name
,c_first_name
,c_salutation
,c_preferred_cust_flag
,ss_ticket_number
,cnt from
(select ss_ticket_number
,ss_customer_sk
,count(*) cnt
from store_sales
JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
where
(date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28)
and (household_demographics.hd_buy_potential = '>10000' or
household_demographics.hd_buy_potential = 'unknown')
and household_demographics.hd_vehicle_count > 0
and (case when household_demographics.hd_vehicle_count > 0
then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count
else null
end) > 1.2
and date_dim.d_year in (1998,1998+1,1998+2)
and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County',
'Williamson County','Williamson County','Williamson County','Williamson County')
group by ss_ticket_number,ss_customer_sk) dn
JOIN customer ON dn.ss_customer_sk = customer.c_customer_sk
WHERE
cnt between 15 and 20
order by
c_last_name,
c_first_name,
c_salutation,
c_preferred_cust_flag desc,
ss_ticket_number,
cnt"""),
("q42", """
select d_year
,item.i_category_id
,item.i_category
,sum(ss_ext_sales_price) as s
from date_dim dt
JOIN store_sales ON dt.d_date_sk = store_sales.ss_sold_date_sk
JOIN item ON store_sales.ss_item_sk = item.i_item_sk
where
item.i_manager_id = 1
and dt.d_moy=12
and dt.d_year=1998
group by d_year
,item.i_category_id
,item.i_category
order by s desc,d_year
,i_category_id
,i_category
limit 100"""),
("q43", """
select s_store_name, s_store_id,
sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales,
sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
from date_dim
JOIN store_sales ON date_dim.d_date_sk = store_sales.ss_sold_date_sk
JOIN store ON store.s_store_sk = store_sales.ss_store_sk
where
s_gmt_offset = -5 and
d_year = 1998
group by s_store_name, s_store_id
order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
limit 100"""),
("q46", """
select c_last_name
,c_first_name
,ca_city
,bought_city
,ss_ticket_number
,amt,profit
from
(select ss_ticket_number
,ss_customer_sk
,ca_city as bought_city
,sum(ss_coupon_amt) as amt
,sum(ss_net_profit) as profit
from store_sales
JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
JOIN customer_address ON store_sales.ss_addr_sk = customer_address.ca_address_sk
where
(household_demographics.hd_dep_count = 5 or
household_demographics.hd_vehicle_count= 3)
and date_dim.d_dow in (6,0)
and date_dim.d_year in (1999,1999+1,1999+2)
and store.s_city in ('Midway','Fairview','Fairview','Fairview','Fairview')
group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn
JOIN customer ON dn.ss_customer_sk = customer.c_customer_sk
JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk
where
customer_address.ca_city <> dn.bought_city
order by c_last_name
,c_first_name
,ca_city
,bought_city
,ss_ticket_number
limit 100"""),
("q52", """
select d_year
,item.i_brand_id brand_id
,item.i_brand brand
,sum(ss_ext_sales_price) as ext_price
from date_dim
JOIN store_sales ON date_dim.d_date_sk = store_sales.ss_sold_date_sk
JOIN item ON store_sales.ss_item_sk = item.i_item_sk
where
item.i_manager_id = 1
and date_dim.d_moy=12
and date_dim.d_year=1998
group by d_year
,item.i_brand
,item.i_brand_id
order by d_year
,ext_price desc
,brand_id
limit 100"""),
("q55", """
select i_brand_id as brand_id, i_brand as brand,
sum(store_sales.ss_ext_sales_price) ext_price
from date_dim
JOIN store_sales ON date_dim.d_date_sk = store_sales.ss_sold_date_sk
JOIN item ON store_sales.ss_item_sk = item.i_item_sk
where
i_manager_id=36
and d_moy=12
and d_year=2001
group by i_brand, i_brand_id
order by ext_price desc, brand_id
limit 100 """),
("q59",
"""
|select
| s_store_name1,
| s_store_id1,
| d_week_seq1,
| sun_sales1 / sun_sales2,
| mon_sales1 / mon_sales2,
| tue_sales1 / tue_sales2,
| wed_sales1 / wed_sales2,
| thu_sales1 / thu_sales2,
| fri_sales1 / fri_sales2,
| sat_sales1 / sat_sales2
|from
| (select
| /*+ MAPJOIN(store, date_dim) */
| s_store_name s_store_name1,
| wss.d_week_seq d_week_seq1,
| s_store_id s_store_id1,
| sun_sales sun_sales1,
| mon_sales mon_sales1,
| tue_sales tue_sales1,
| wed_sales wed_sales1,
| thu_sales thu_sales1,
| fri_sales fri_sales1,
| sat_sales sat_sales1
| from
| (select
| /*+ MAPJOIN(date_dim) */
| d_week_seq,
| ss_store_sk,
| sum(case when(d_day_name = 'Sunday') then ss_sales_price else null end) sun_sales,
| sum(case when(d_day_name = 'Monday') then ss_sales_price else null end) mon_sales,
| sum(case when(d_day_name = 'Tuesday') then ss_sales_price else null end) tue_sales,
| sum(case when(d_day_name = 'Wednesday') then ss_sales_price else null end) wed_sales,
| sum(case when(d_day_name = 'Thursday') then ss_sales_price else null end) thu_sales,
| sum(case when(d_day_name = 'Friday') then ss_sales_price else null end) fri_sales,
| sum(case when(d_day_name = 'Saturday') then ss_sales_price else null end) sat_sales
| from
| store_sales
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| -- ss_date between '1998-10-01' and '1999-09-30'
| ss_sold_date_sk between 2451088 and 2451452
| group by
| d_week_seq,
| ss_store_sk
| ) wss
| join store on (wss.ss_store_sk = store.s_store_sk)
| join date_dim d on (wss.d_week_seq = d.d_week_seq)
| where
| d_month_seq between 1185 and 1185 + 11
| ) y
| join
| (select
| /*+ MAPJOIN(store, date_dim) */
| s_store_name s_store_name2,
| wss.d_week_seq d_week_seq2,
| s_store_id s_store_id2,
| sun_sales sun_sales2,
| mon_sales mon_sales2,
| tue_sales tue_sales2,
| wed_sales wed_sales2,
| thu_sales thu_sales2,
| fri_sales fri_sales2,
| sat_sales sat_sales2
| from
| (select
| /*+ MAPJOIN(date_dim) */
| d_week_seq,
| ss_store_sk,
| sum(case when(d_day_name = 'Sunday') then ss_sales_price else null end) sun_sales,
| sum(case when(d_day_name = 'Monday') then ss_sales_price else null end) mon_sales,
| sum(case when(d_day_name = 'Tuesday') then ss_sales_price else null end) tue_sales,
| sum(case when(d_day_name = 'Wednesday') then ss_sales_price else null end) wed_sales,
| sum(case when(d_day_name = 'Thursday') then ss_sales_price else null end) thu_sales,
| sum(case when(d_day_name = 'Friday') then ss_sales_price else null end) fri_sales,
| sum(case when(d_day_name = 'Saturday') then ss_sales_price else null end) sat_sales
| from
| store_sales
| join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
| where
| -- ss_date between '1999-10-01' and '2000-09-30'
| ss_sold_date_sk between 2451088 and 2451452
| group by
| d_week_seq,
| ss_store_sk
| ) wss
| join store on (wss.ss_store_sk = store.s_store_sk)
| join date_dim d on (wss.d_week_seq = d.d_week_seq)
| where
| d_month_seq between 1185 + 12 and 1185 + 23
| ) x
| on (y.s_store_id1 = x.s_store_id2)
|where
| d_week_seq1 = d_week_seq2 - 52
|order by
| s_store_name1,
| s_store_id1,
| d_week_seq1
|limit 100
""".stripMargin),
("q68", """
select c_last_name ,c_first_name ,ca_city
,bought_city ,ss_ticket_number ,extended_price
,extended_tax ,list_price
from (select ss_ticket_number
,ss_customer_sk
,ca_city as bought_city
,sum(ss_ext_sales_price) as extended_price
,sum(ss_ext_list_price) as list_price
,sum(ss_ext_tax) as extended_tax
from store_sales
JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
JOIN customer_address ON store_sales.ss_addr_sk = customer_address.ca_address_sk
where
date_dim.d_dom between 1 and 2
and (household_demographics.hd_dep_count = 5 or
household_demographics.hd_vehicle_count= 3)
and date_dim.d_year in (1999,1999+1,1999+2)
and store.s_city in ('Midway','Fairview')
group by ss_ticket_number
,ss_customer_sk
,ss_addr_sk,ca_city) dn
JOIN customer ON dn.ss_customer_sk = customer.c_customer_sk
JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk
where
customer_address.ca_city <> dn.bought_city
order by c_last_name
,ss_ticket_number
limit 100"""),
("q73", """
select c_last_name
,c_first_name
,c_salutation
,c_preferred_cust_flag
,ss_ticket_number
,cnt from
(select ss_ticket_number
,ss_customer_sk
,count(*) cnt
from store_sales
JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
where
date_dim.d_dom between 1 and 2
and (household_demographics.hd_buy_potential = '>10000' or
household_demographics.hd_buy_potential = 'unknown')
and household_demographics.hd_vehicle_count > 0
and case when household_demographics.hd_vehicle_count > 0 then
household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1
and date_dim.d_year in (1998,1998+1,1998+2)
and store.s_county in ('Williamson County','Franklin Parish','Bronx County','Orange County')
group by ss_ticket_number,ss_customer_sk) dj
JOIN customer ON dj.ss_customer_sk = customer.c_customer_sk
where
cnt between 5 and 10
order by cnt desc"""),
("q79", """
select
c_last_name,c_first_name,substr(s_city,1,30) as s_city,ss_ticket_number,amt,profit
from
(select ss_ticket_number
,ss_customer_sk
,store.s_city
,sum(ss_coupon_amt) amt
,sum(ss_net_profit) profit
from store_sales
JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
where
(household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0)
and date_dim.d_dow = 1
and date_dim.d_year in (1998,1998+1,1998+2)
and store.s_number_employees between 200 and 295
group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms
JOIN customer on ms.ss_customer_sk = customer.c_customer_sk
order by c_last_name,c_first_name,s_city, profit
limit 100"""),
("qSsMax",
"""
|select
| count(*) as total,
| count(ss_sold_date_sk) as not_null_total,
| count(distinct ss_sold_date_sk) as unique_days,
| max(ss_sold_date_sk) as max_ss_sold_date_sk,
| max(ss_sold_time_sk) as max_ss_sold_time_sk,
| max(ss_item_sk) as max_ss_item_sk,
| max(ss_customer_sk) as max_ss_customer_sk,
| max(ss_cdemo_sk) as max_ss_cdemo_sk,
| max(ss_hdemo_sk) as max_ss_hdemo_sk,
| max(ss_addr_sk) as max_ss_addr_sk,
| max(ss_store_sk) as max_ss_store_sk,
| max(ss_promo_sk) as max_ss_promo_sk
|from store_sales
""".stripMargin)
).map { case (name, sqlText) =>
Query(name, sqlText, description = "original query", executionMode = CollectResults)
}
val interactiveQueries =
Seq("q19", "q42", "q52", "q55", "q63", "q68", "q73", "q98").map(queriesMap)
val reportingQueries = Seq("q3","q7","q27","q43", "q53", "q89").map(queriesMap)
val deepAnalyticQueries = Seq("q34", "q46", "q59", "q65", "q79", "ss_max").map(queriesMap)
val impalaKitQueries = interactiveQueries ++ reportingQueries ++ deepAnalyticQueries
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy