All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databricks.spark.sql.perf.tpcds.SimpleQueries.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015 Databricks Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.databricks.spark.sql.perf.tpcds

import com.databricks.spark.sql.perf.{ExecutionMode, Benchmark}

trait SimpleQueries extends Benchmark {

  import ExecutionMode._

   val targetedPerfQueries = Seq(
     // Query to measure scan performance.
     ("stores-sales-scan",
       """
         |select * from store_sales where ss_item_sk = 1
       """.stripMargin),
     ("fact-fact-join",
       """
         | select count(*) from store_sales
         | join store_returns
         | on store_sales.ss_item_sk = store_returns.sr_item_sk
         | and store_sales.ss_ticket_number = store_returns.sr_ticket_number
       """.stripMargin)
   ).map { case (name, sqlText) =>
     Query(name = name, sqlText = sqlText, description = "", executionMode = ForeachResults)
   }

   val q7Derived = Seq(
     ("q7-simpleScan",
       """
         |select
         |  ss_quantity,
         |  ss_list_price,
         |  ss_coupon_amt,
         |  ss_coupon_amt,
         |  ss_cdemo_sk,
         |  ss_item_sk,
         |  ss_promo_sk,
         |  ss_sold_date_sk
         |from store_sales
         |where
         |  ss_sold_date_sk between 2450815 and 2451179
       """.stripMargin),

     ("q7-twoMapJoins", """
                          |select
                          |  i_item_id,
                          |  ss_quantity,
                          |  ss_list_price,
                          |  ss_coupon_amt,
                          |  ss_sales_price,
                          |  ss_promo_sk,
                          |  ss_sold_date_sk
                          |from
                          |  store_sales
                          |  join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
                          |  join item on (store_sales.ss_item_sk = item.i_item_sk)
                          |where
                          |  cd_gender = 'F'
                          |  and cd_marital_status = 'W'
                          |  and cd_education_status = 'Primary'
                          |  and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
                        """.stripMargin),

     ("q7-fourMapJoins", """
                           |select
                           |  i_item_id,
                           |  ss_quantity,
                           |  ss_list_price,
                           |  ss_coupon_amt,
                           |  ss_sales_price
                           |from
                           |  store_sales
                           |  join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
                           |  join item on (store_sales.ss_item_sk = item.i_item_sk)
                           |  join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk)
                           |  join date_dim on (ss_sold_date_sk = d_date_sk)
                           |where
                           |  cd_gender = 'F'
                           |  and cd_marital_status = 'W'
                           |  and cd_education_status = 'Primary'
                           |  and (p_channel_email = 'N'
                           |    or p_channel_event = 'N')
                           |  and d_year = 1998
                           |  -- and ss_date between '1998-01-01' and '1998-12-31'
                           |  and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
                         """.stripMargin),

     ("q7-noOrderBy", """
                        |select
                        |  i_item_id,
                        |  avg(ss_quantity) agg1,
                        |  avg(ss_list_price) agg2,
                        |  avg(ss_coupon_amt) agg3,
                        |  avg(ss_sales_price) agg4
                        |from
                        |  store_sales
                        |  join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
                        |  join item on (store_sales.ss_item_sk = item.i_item_sk)
                        |  join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk)
                        |  join date_dim on (ss_sold_date_sk = d_date_sk)
                        |where
                        |  cd_gender = 'F'
                        |  and cd_marital_status = 'W'
                        |  and cd_education_status = 'Primary'
                        |  and (p_channel_email = 'N'
                        |    or p_channel_event = 'N')
                        |  and d_year = 1998
                        |  -- and ss_date between '1998-01-01' and '1998-12-31'
                        |  and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
                        |group by
                        |  i_item_id
                      """.stripMargin),

     ("q7", """
              |-- start query 1 in stream 0 using template query7.tpl
              |select
              |  i_item_id,
              |  avg(ss_quantity) agg1,
              |  avg(ss_list_price) agg2,
              |  avg(ss_coupon_amt) agg3,
              |  avg(ss_sales_price) agg4
              |from
              |  store_sales
              |  join customer_demographics on (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
              |  join item on (store_sales.ss_item_sk = item.i_item_sk)
              |  join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk)
              |  join date_dim on (ss_sold_date_sk = d_date_sk)
              |where
              |  cd_gender = 'F'
              |  and cd_marital_status = 'W'
              |  and cd_education_status = 'Primary'
              |  and (p_channel_email = 'N'
              |    or p_channel_event = 'N')
              |  and d_year = 1998
              |  -- and ss_date between '1998-01-01' and '1998-12-31'
              |  and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
              |group by
              |  i_item_id
              |order by
              |  i_item_id
              |limit 100
              |-- end query 1 in stream 0 using template query7.tpl
            """.stripMargin),
            
      ("store_sales-selfjoin-1",   """
                                   |-- The join condition will yield many matches.
                                   |select
                                   |  t1.ss_quantity,
                                   |  t1.ss_list_price,
                                   |  t1.ss_coupon_amt,
                                   |  t1.ss_cdemo_sk,
                                   |  t1.ss_item_sk,
                                   |  t1.ss_promo_sk,
                                   |  t1.ss_sold_date_sk
                                   |from store_sales t1 join store_sales t2 on t1.ss_item_sk = t2.ss_item_sk
                                   |where
                                   |  t1.ss_sold_date_sk between 2450815 and 2451179
                                   """.stripMargin),

	   
      ("store_sales-selfjoin-2",   """
                                   |-- We ust comound primary key as the join condition. The size of output is comparable with the input table.
                                   |select
                                   |  t1.ss_quantity,
                                   |  t1.ss_list_price,
                                   |  t1.ss_coupon_amt,
                                   |  t1.ss_cdemo_sk,
                                   |  t1.ss_item_sk,
                                   |  t1.ss_promo_sk,
                                   |  t1.ss_sold_date_sk
                                   |from store_sales t1 join store_sales t2 on t1.ss_item_sk = t2.ss_item_sk and t1.ss_ticket_number = t2.ss_ticket_number
                                   |where
                                   |  t1.ss_sold_date_sk between 2450815 and 2451179
                                   """.stripMargin)
   ).map { case (name, sqlText) =>
     Query(name = name, sqlText = sqlText, description = "", executionMode = ForeachResults)
   }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy