All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prestosql.tests.hive.TestHiveTableStatistics Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.prestosql.tests.hive;

import com.google.common.collect.ImmutableList;
import io.prestosql.tempto.Requirement;
import io.prestosql.tempto.Requirements;
import io.prestosql.tempto.RequirementsProvider;
import io.prestosql.tempto.Requires;
import io.prestosql.tempto.assertions.QueryAssert.Row;
import io.prestosql.tempto.configuration.Configuration;
import io.prestosql.tempto.fulfillment.table.MutableTableRequirement;
import io.prestosql.tempto.fulfillment.table.hive.HiveTableDefinition;
import io.prestosql.tempto.fulfillment.table.hive.InlineDataSource;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.List;
import java.util.Objects;

import static io.prestosql.tempto.assertions.QueryAssert.Row.row;
import static io.prestosql.tempto.assertions.QueryAssert.anyOf;
import static io.prestosql.tempto.assertions.QueryAssert.assertThat;
import static io.prestosql.tempto.fulfillment.table.MutableTablesState.mutableTablesState;
import static io.prestosql.tempto.fulfillment.table.TableRequirements.mutableTable;
import static io.prestosql.tempto.fulfillment.table.hive.tpch.TpchTableDefinitions.NATION;
import static io.prestosql.tempto.query.QueryExecutor.query;
import static io.prestosql.tests.hive.AllSimpleTypesTableDefinitions.ALL_HIVE_SIMPLE_TYPES_TEXTFILE;
import static io.prestosql.tests.hive.HiveTableDefinitions.NATION_PARTITIONED_BY_BIGINT_REGIONKEY;
import static io.prestosql.tests.hive.HiveTableDefinitions.NATION_PARTITIONED_BY_VARCHAR_REGIONKEY;
import static io.prestosql.tests.utils.QueryExecutors.onHive;
import static java.lang.String.format;

public class TestHiveTableStatistics
        extends HiveProductTest
{
    private static class UnpartitionedNationTable
            implements RequirementsProvider
    {
        @Override
        public Requirement getRequirements(Configuration configuration)
        {
            return mutableTable(
                    HiveTableDefinition.from(NATION)
                            .injectStats(false)
                            .build());
        }
    }

    private static class NationPartitionedByBigintTable
            implements RequirementsProvider
    {
        @Override
        public Requirement getRequirements(Configuration configuration)
        {
            return mutableTable(
                    HiveTableDefinition.from(NATION_PARTITIONED_BY_BIGINT_REGIONKEY)
                            .injectStats(false)
                            .build());
        }
    }

    private static class NationPartitionedByVarcharTable
            implements RequirementsProvider
    {
        @Override
        public Requirement getRequirements(Configuration configuration)
        {
            return mutableTable(
                    HiveTableDefinition.from(NATION_PARTITIONED_BY_VARCHAR_REGIONKEY)
                            .injectStats(false)
                            .build());
        }
    }

    private static final String ALL_TYPES_TABLE_NAME = "all_types";
    private static final String EMPTY_ALL_TYPES_TABLE_NAME = "empty_all_types";
    private static final String ALL_TYPES_ALL_NULL_TABLE_NAME = "all_types_all_null";

    private static final HiveTableDefinition ALL_TYPES_TABLE = HiveTableDefinition.like(ALL_HIVE_SIMPLE_TYPES_TEXTFILE)
            .setDataSource(InlineDataSource.createStringDataSource(
                    "all_analyzable_types",
                    "121|32761|2147483641|9223372036854775801|123.341|234.561|344.671|345.671|2015-05-10 12:15:31.123456|2015-05-09|ela ma kota|ela ma kot|ela ma    |false|cGllcyBiaW5hcm55|\n" +
                            "127|32767|2147483647|9223372036854775807|123.345|235.567|345.678|345.678|2015-05-10 12:15:35.123456|2015-06-10|ala ma kota|ala ma kot|ala ma    |true|a290IGJpbmFybnk=|\n"))
            .build();

    private static final HiveTableDefinition ALL_TYPES_ALL_NULL_TABLE = HiveTableDefinition.like(ALL_HIVE_SIMPLE_TYPES_TEXTFILE)
            .setDataSource(InlineDataSource.createStringDataSource(
                    "all_analyzable_types_all_null",
                    "\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\n"))
            .build();

    private static final class AllTypesTable
            implements RequirementsProvider
    {
        @Override
        public Requirement getRequirements(Configuration configuration)
        {
            return Requirements.compose(
                    mutableTable(ALL_TYPES_TABLE, ALL_TYPES_TABLE_NAME, MutableTableRequirement.State.LOADED),
                    mutableTable(ALL_TYPES_ALL_NULL_TABLE, ALL_TYPES_ALL_NULL_TABLE_NAME, MutableTableRequirement.State.LOADED),
                    mutableTable(ALL_TYPES_TABLE, EMPTY_ALL_TYPES_TABLE_NAME, MutableTableRequirement.State.CREATED));
        }
    }

    private List getAllTypesTableStatistics()
    {
        return ImmutableList.of(
                row("c_tinyint", null, 2.0, 0.0, null, "121", "127"),
                row("c_smallint", null, 2.0, 0.0, null, "32761", "32767"),
                row("c_int", null, 2.0, 0.0, null, "2147483641", "2147483647"),
                row("c_bigint", null, 2.0, 0.0, null, "9223372036854775807", "9223372036854775807"),
                row("c_float", null, 2.0, 0.0, null, "123.341", "123.345"),
                row("c_double", null, 2.0, 0.0, null, "234.561", "235.567"),
                row("c_decimal", null, 2.0, 0.0, null, "345.0", "346.0"),
                row("c_decimal_w_params", null, 2.0, 0.0, null, "345.671", "345.678"),
                row("c_timestamp", null, 2.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 2.0, 0.0, null, "2015-05-09", "2015-06-10"),
                row("c_string", 22.0, 2.0, 0.0, null, null, null),
                row("c_varchar", 20.0, 2.0, 0.0, null, null, null),
                row("c_char", 12.0, 2.0, 0.0, null, null, null),
                row("c_boolean", null, 2.0, 0.0, null, null, null),
                row("c_binary", 23.0, null, 0.0, null, null, null),
                row(null, null, null, null, 2.0, null, null));
    }

    private List getAllTypesAllNullTableStatistics()
    {
        return ImmutableList.of(
                row("c_tinyint", null, 0.0, 1.0, null, null, null),
                row("c_smallint", null, 0.0, 1.0, null, null, null),
                row("c_int", null, 0.0, 1.0, null, null, null),
                row("c_bigint", null, 0.0, 1.0, null, null, null),
                row("c_float", null, 0.0, 1.0, null, null, null),
                row("c_double", null, 0.0, 1.0, null, null, null),
                row("c_decimal", null, 0.0, 1.0, null, null, null),
                row("c_decimal_w_params", null, 0.0, 1.0, null, null, null),
                row("c_timestamp", null, 0.0, 1.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 0.0, 1.0, null, null, null),
                row("c_string", 0.0, 0.0, 1.0, null, null, null),
                row("c_varchar", 0.0, 0.0, 1.0, null, null, null),
                row("c_char", 0.0, 0.0, 1.0, null, null, null),
                row("c_boolean", null, 0.0, 1.0, null, null, null),
                row("c_binary", 0.0, null, 1.0, null, null, null),
                row(null, null, null, null, 1.0, null, null));
    }

    private List getAllTypesEmptyTableStatistics()
    {
        return ImmutableList.of(
                row("c_tinyint", null, 0.0, 0.0, null, null, null),
                row("c_smallint", null, 0.0, 0.0, null, null, null),
                row("c_int", null, 0.0, 0.0, null, null, null),
                row("c_bigint", null, 0.0, 0.0, null, null, null),
                row("c_float", null, 0.0, 0.0, null, null, null),
                row("c_double", null, 0.0, 0.0, null, null, null),
                row("c_decimal", null, 0.0, 0.0, null, null, null),
                row("c_decimal_w_params", null, 0.0, 0.0, null, null, null),
                row("c_timestamp", null, 0.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 0.0, 0.0, null, null, null),
                row("c_string", 0.0, 0.0, 0.0, null, null, null),
                row("c_varchar", 0.0, 0.0, 0.0, null, null, null),
                row("c_char", 0.0, 0.0, 0.0, null, null, null),
                row("c_boolean", null, 0.0, 0.0, null, null, null),
                row("c_binary", 0.0, null, 0.0, null, null, null),
                row(null, null, null, null, 0.0, null, null));
    }

    @Test
    @Requires(UnpartitionedNationTable.class)
    public void testStatisticsForUnpartitionedTable()
    {
        String tableNameInDatabase = mutableTablesState().get(NATION.getName()).getNameInDatabase();

        String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase;

        // table not analyzed

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("n_nationkey", null, null, null, null, null, null),
                row("n_name", null, null, null, null, null, null),
                row("n_regionkey", null, null, null, null, null, null),
                row("n_comment", null, null, null, null, null, null),
                row(null, null, null, null, anyOf(null, 0.0), null, null)); // anyOf because of different behaviour on HDP (hive 1.2) and CDH (hive 1.1)

        // basic analysis

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("n_nationkey", null, null, null, null, null, null),
                row("n_name", null, null, null, null, null, null),
                row("n_regionkey", null, null, null, null, null, null),
                row("n_comment", null, null, null, null, null, null),
                row(null, null, null, null, 25.0, null, null));

        // column analysis

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("n_nationkey", null, anyOf(19., 25.), 0.0, null, "0", "24"),
                row("n_name", 177.0, anyOf(24., 25.), 0.0, null, null, null),
                row("n_regionkey", null, 5.0, 0.0, null, "0", "4"),
                row("n_comment", 1857.0, 25.0, 0.0, null, null, null),
                row(null, null, null, null, 25.0, null, null));
    }

    @Test
    @Requires(NationPartitionedByBigintTable.class)
    public void testStatisticsForTablePartitionedByBigint()
    {
        String tableNameInDatabase = mutableTablesState().get(NATION_PARTITIONED_BY_BIGINT_REGIONKEY.getName()).getNameInDatabase();

        String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase;
        String showStatsPartitionOne = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 1)";
        String showStatsPartitionTwo = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 2)";

        // table not analyzed

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // basic analysis for single partition

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey = \"1\") COMPUTE STATISTICS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, 3.0, 0.0, null, "1", "3"),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "1", "1"),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // basic analysis for all partitions

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey) COMPUTE STATISTICS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, 3.0, 0.0, null, "1", "3"),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "1", "1"),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "2", "2"),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        // column analysis for single partition

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey = \"1\") COMPUTE STATISTICS FOR COLUMNS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 114.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 3.0, 0.0, null, "1", "3"),
                row("p_comment", 1497.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "1", "1"),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "2", "2"),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        // column analysis for all partitions

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey) COMPUTE STATISTICS FOR COLUMNS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 109.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 3.0, 0.0, null, "1", "3"),
                row("p_comment", 1197.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "1", "1"),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, anyOf(4., 5.), 0.0, null, "8", "21"),
                row("p_name", 31.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "2", "2"),
                row("p_comment", 351.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));
    }

    @Test
    @Requires(NationPartitionedByVarcharTable.class)
    public void testStatisticsForTablePartitionedByVarchar()
    {
        String tableNameInDatabase = mutableTablesState().get(NATION_PARTITIONED_BY_VARCHAR_REGIONKEY.getName()).getNameInDatabase();

        String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase;
        String showStatsPartitionOne = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 'AMERICA')";
        String showStatsPartitionTwo = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 'ASIA')";

        // table not analyzed

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // basic analysis for single partition

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey = \"AMERICA\") COMPUTE STATISTICS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", 85.0, 3.0, 0.0, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", 35.0, 1.0, 0.0, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // basic analysis for all partitions

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey) COMPUTE STATISTICS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", 85.0, 3.0, 0.0, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", 35.0, 1.0, 0.0, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", 20.0, 1.0, 0.0, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        // column analysis for single partition

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey = \"AMERICA\") COMPUTE STATISTICS FOR COLUMNS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 114.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 85.0, 3.0, 0.0, null, null, null),
                row("p_comment", 1497.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 35.0, 1.0, 0.0, null, null, null),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", 20.0, 1.0, 0.0, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        // column analysis for all partitions

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " PARTITION (p_regionkey) COMPUTE STATISTICS FOR COLUMNS");

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 109.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 85.0, 3.0, 0.0, null, null, null),
                row("p_comment", 1197.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 35.0, 1.0, 0.0, null, null, null),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, anyOf(4., 5.), 0.0, null, "8", "21"),
                row("p_name", 31.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 20.0, 1.0, 0.0, null, null, null),
                row("p_comment", 351.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));
    }

    // This covers also stats calculation for unpartitioned table
    @Test
    @Requires(AllTypesTable.class)
    public void testStatisticsForAllDataTypes()
    {
        String tableNameInDatabase = mutableTablesState().get(ALL_TYPES_TABLE_NAME).getNameInDatabase();

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS");

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, null, null, null, null, null),
                row("c_smallint", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row("c_bigint", null, null, null, null, null, null),
                row("c_float", null, null, null, null, null, null),
                row("c_double", null, null, null, null, null, null),
                row("c_decimal", null, null, null, null, null, null),
                row("c_decimal_w_params", null, null, null, null, null, null),
                row("c_timestamp", null, null, null, null, null, null),
                row("c_date", null, null, null, null, null, null),
                row("c_string", null, null, null, null, null, null),
                row("c_varchar", null, null, null, null, null, null),
                row("c_char", null, null, null, null, null, null),
                row("c_boolean", null, null, null, null, null, null),
                row("c_binary", null, null, null, null, null, null),
                row(null, null, null, null, 2.0, null, null));

        if (isHiveVersionBefore12()) {
            onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS " +
                    "c_tinyint, c_smallint, c_int, c_bigint, c_float, c_double, c_decimal, c_decimal_w_params, c_timestamp, " +
                    "c_string, c_varchar, c_char, c_bigint, c_binary");
        }
        else {
            onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS");
        }

        // SHOW STATS FORMAT: column_name, data_size, distinct_values_count, nulls_fraction, row_count
        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, 2.0, 0.0, null, "121", "127"),
                row("c_smallint", null, 2.0, 0.0, null, "32761", "32767"),
                row("c_int", null, 2.0, 0.0, null, "2147483641", "2147483647"),
                row("c_bigint", null, 2.0, 0.0, null, "9223372036854775807", "9223372036854775807"),
                row("c_float", null, 2.0, 0.0, null, "123.341", "123.345"),
                row("c_double", null, 2.0, 0.0, null, "234.561", "235.567"),
                row("c_decimal", null, 2.0, 0.0, null, "345.0", "346.0"),
                row("c_decimal_w_params", null, 2.0, 0.0, null, "345.671", "345.678"),
                row("c_timestamp", null, 2.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 2.0, 0.0, null, "2015-05-09", "2015-06-10"),
                row("c_string", 22.0, 2.0, 0.0, null, null, null),
                row("c_varchar", 20.0, 2.0, 0.0, null, null, null),
                row("c_char", 12.0, 2.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_boolean", null, null, null, null, null, null)
                        : row("c_boolean", null, 2.0, 0.0, null, null, null),
                row("c_binary", 23.0, null, 0.0, null, null, null),
                row(null, null, null, null, 2.0, null, null));
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testStatisticsForAllDataTypesNoData()
    {
        String tableNameInDatabase = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase();

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS");

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, null, null, null, null, null),
                row("c_smallint", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row("c_bigint", null, null, null, null, null, null),
                row("c_float", null, null, null, null, null, null),
                row("c_double", null, null, null, null, null, null),
                row("c_decimal", null, null, null, null, null, null),
                row("c_decimal_w_params", null, null, null, null, null, null),
                row("c_timestamp", null, null, null, null, null, null),
                row("c_date", null, null, null, null, null, null),
                row("c_string", null, null, null, null, null, null),
                row("c_varchar", null, null, null, null, null, null),
                row("c_char", null, null, null, null, null, null),
                row("c_boolean", null, null, null, null, null, null),
                row("c_binary", null, null, null, null, null, null),
                row(null, null, null, null, 0.0, null, null));

        if (isHiveVersionBefore12()) {
            onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS " +
                    "c_tinyint, c_smallint, c_int, c_bigint, c_float, c_double, c_decimal, c_decimal_w_params, c_timestamp, " +
                    "c_string, c_varchar, c_char, c_bigint, c_binary");
        }
        else {
            onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS");
        }

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, 0.0, 0.0, null, null, null),
                row("c_smallint", null, 0.0, 0.0, null, null, null),
                row("c_int", null, 0.0, 0.0, null, null, null),
                row("c_bigint", null, 0.0, 0.0, null, null, null),
                row("c_float", null, 0.0, 0.0, null, null, null),
                row("c_double", null, 0.0, 0.0, null, null, null),
                row("c_decimal", null, 0.0, 0.0, null, null, null),
                row("c_decimal_w_params", null, 0.0, 0.0, null, null, null),
                row("c_timestamp", null, 0.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 0.0, 0.0, null, null, null),
                row("c_string", 0.0, 0.0, 0.0, null, null, null),
                row("c_varchar", 0.0, 0.0, 0.0, null, null, null),
                row("c_char", 0.0, 0.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_boolean", null, null, null, null, null, null)
                        : row("c_boolean", null, 0.0, 0.0, null, null, null),
                row("c_binary", 0.0, null, 0.0, null, null, null),
                row(null, null, null, null, 0.0, null, null));
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testStatisticsForAllDataTypesOnlyNulls()
    {
        String tableNameInDatabase = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase();
        onHive().executeQuery("INSERT INTO TABLE " + tableNameInDatabase + " VALUES(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null)");

        onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS");

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, null, null, null, null, null),
                row("c_smallint", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row("c_bigint", null, null, null, null, null, null),
                row("c_float", null, null, null, null, null, null),
                row("c_double", null, null, null, null, null, null),
                row("c_decimal", null, null, null, null, null, null),
                row("c_decimal_w_params", null, null, null, null, null, null),
                row("c_timestamp", null, null, null, null, null, null),
                row("c_date", null, null, null, null, null, null),
                row("c_string", null, null, null, null, null, null),
                row("c_varchar", null, null, null, null, null, null),
                row("c_char", null, null, null, null, null, null),
                row("c_boolean", null, null, null, null, null, null),
                row("c_binary", null, null, null, null, null, null),
                row(null, null, null, null, 1.0, null, null));

        if (isHiveVersionBefore12()) {
            onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS " +
                    "c_tinyint, c_smallint, c_int, c_bigint, c_float, c_double, c_decimal, c_decimal_w_params, c_timestamp, " +
                    "c_string, c_varchar, c_char, c_bigint, c_binary");
        }
        else {
            onHive().executeQuery("ANALYZE TABLE " + tableNameInDatabase + " COMPUTE STATISTICS FOR COLUMNS");
        }

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, 0.0, 1.0, null, null, null),
                row("c_smallint", null, 0.0, 1.0, null, null, null),
                row("c_int", null, 0.0, 1.0, null, null, null),
                row("c_bigint", null, 0.0, 1.0, null, null, null),
                row("c_float", null, 0.0, 1.0, null, null, null),
                row("c_double", null, 0.0, 1.0, null, null, null),
                row("c_decimal", null, 0.0, 1.0, null, null, null),
                row("c_decimal_w_params", null, 0.0, 1.0, null, null, null),
                row("c_timestamp", null, 0.0, 1.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 0.0, 1.0, null, null, null),
                row("c_string", 0.0, 0.0, 1.0, null, null, null),
                row("c_varchar", 0.0, 0.0, 1.0, null, null, null),
                row("c_char", 0.0, 0.0, 1.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_boolean", null, null, null, null, null, null)
                        : row("c_boolean", null, 0.0, 1.0, null, null, null),
                row("c_binary", 0.0, null, 1.0, null, null, null),
                row(null, null, null, null, 1.0, null, null));
    }

    @Test
    @Requires(UnpartitionedNationTable.class)
    public void testStatisticsForSkewedTable()
    {
        String tableName = "test_hive_skewed_table_statistics";
        onHive().executeQuery("DROP TABLE IF EXISTS " + tableName);
        onHive().executeQuery("CREATE TABLE " + tableName + " (c_string STRING, c_int INT) SKEWED BY (c_string) ON ('c1')");
        onHive().executeQuery("INSERT INTO TABLE " + tableName + " VALUES ('c1', 1), ('c1', 2)");

        assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(
                row("c_string", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row(null, null, null, null, 2.0, null, null));

        onHive().executeQuery("ANALYZE TABLE " + tableName + " COMPUTE STATISTICS");

        assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(
                row("c_string", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row(null, null, null, null, 2.0, null, null));

        onHive().executeQuery("ANALYZE TABLE " + tableName + " COMPUTE STATISTICS FOR COLUMNS");
        assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(
                row("c_string", 4.0, 1.0, 0.0, null, null, null),
                row("c_int", null, 2.0, 0.0, null, "1", "2"),
                row(null, null, null, null, 2.0, null, null));
    }

    @Test
    @Requires(UnpartitionedNationTable.class)
    public void testAnalyzesForSkewedTable()
    {
        String tableName = "test_analyze_skewed_table";
        onHive().executeQuery("DROP TABLE IF EXISTS " + tableName);
        onHive().executeQuery("CREATE TABLE " + tableName + " (c_string STRING, c_int INT) SKEWED BY (c_string) ON ('c1')");
        onHive().executeQuery("INSERT INTO TABLE " + tableName + " VALUES ('c1', 1), ('c1', 2)");

        assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(
                row("c_string", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row(null, null, null, null, 2.0, null, null));

        assertThat(query("ANALYZE " + tableName)).containsExactly(row(2));
        assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(
                row("c_string", 4.0, 1.0, 0.0, null, null, null),
                row("c_int", null, 2.0, 0.0, null, "1", "2"),
                row(null, null, null, null, 2.0, null, null));
    }

    @Test
    @Requires(UnpartitionedNationTable.class)
    public void testAnalyzeForUnpartitionedTable()
    {
        String tableNameInDatabase = mutableTablesState().get(NATION.getName()).getNameInDatabase();

        String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase;

        // table not analyzed
        assertThat(query(showStatsWholeTable)).containsOnly(
                row("n_nationkey", null, null, null, null, null, null),
                row("n_name", null, null, null, null, null, null),
                row("n_regionkey", null, null, null, null, null, null),
                row("n_comment", null, null, null, null, null, null),
                row(null, null, null, null, anyOf(null, 0.0), null, null)); // anyOf because of different behaviour on HDP (hive 1.2) and CDH (hive 1.1)

        assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(25));

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("n_nationkey", null, 25.0, 0.0, null, "0", "24"),
                row("n_name", 177.0, 25.0, 0.0, null, null, null),
                row("n_regionkey", null, 5.0, 0.0, null, "0", "4"),
                row("n_comment", 1857.0, 25.0, 0.0, null, null, null),
                row(null, null, null, null, 25.0, null, null));
    }

    @Test
    @Requires(NationPartitionedByBigintTable.class)
    public void testAnalyzeForTablePartitionedByBigint()
    {
        String tableNameInDatabase = mutableTablesState().get(NATION_PARTITIONED_BY_BIGINT_REGIONKEY.getName()).getNameInDatabase();

        String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase;
        String showStatsPartitionOne = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 1)";
        String showStatsPartitionTwo = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 2)";

        // table not analyzed

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // analyze for single partition

        assertThat(query("ANALYZE " + tableNameInDatabase + " WITH (partitions = ARRAY[ARRAY['1']])")).containsExactly(row(5));

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 114.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 3.0, 0.0, null, "1", "3"),
                row("p_comment", 1497.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "1", "1"),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // analyze for all partitions

        assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(15));

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 109.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 3.0, 0.0, null, "1", "3"),
                row("p_comment", 1197.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "1", "1"),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "8", "21"),
                row("p_name", 31.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", null, 1.0, 0.0, null, "2", "2"),
                row("p_comment", 351.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));
    }

    @Test
    @Requires(NationPartitionedByVarcharTable.class)
    public void testAnalyzeForTablePartitionedByVarchar()
    {
        String tableNameInDatabase = mutableTablesState().get(NATION_PARTITIONED_BY_VARCHAR_REGIONKEY.getName()).getNameInDatabase();

        String showStatsWholeTable = "SHOW STATS FOR " + tableNameInDatabase;
        String showStatsPartitionOne = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 'AMERICA')";
        String showStatsPartitionTwo = "SHOW STATS FOR (SELECT * FROM " + tableNameInDatabase + " WHERE p_regionkey = 'ASIA')";

        // table not analyzed

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // analyze for single partition

        assertThat(query("ANALYZE " + tableNameInDatabase + " WITH (partitions = ARRAY[ARRAY['AMERICA']])")).containsExactly(row(5));

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 114.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 85.0, 3.0, 0.0, null, null, null),
                row("p_comment", 1497.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 35.0, 1.0, 0.0, null, null, null),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, null, null, null, null, null),
                row("p_name", null, null, null, null, null, null),
                row("p_regionkey", null, null, null, null, null, null),
                row("p_comment", null, null, null, null, null, null),
                row(null, null, null, null, null, null, null));

        // column analysis for all partitions

        assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(15));

        assertThat(query(showStatsWholeTable)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 109.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 85.0, 3.0, 0.0, null, null, null),
                row("p_comment", 1197.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 15.0, null, null));

        assertThat(query(showStatsPartitionOne)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "1", "24"),
                row("p_name", 38.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 35.0, 1.0, 0.0, null, null, null),
                row("p_comment", 499.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));

        assertThat(query(showStatsPartitionTwo)).containsOnly(
                row("p_nationkey", null, 5.0, 0.0, null, "8", "21"),
                row("p_name", 31.0, 5.0, 0.0, null, null, null),
                row("p_regionkey", 20.0, 1.0, 0.0, null, null, null),
                row("p_comment", 351.0, 5.0, 0.0, null, null, null),
                row(null, null, null, null, 5.0, null, null));
    }

    // This covers also stats calculation for unpartitioned table
    @Test
    @Requires(AllTypesTable.class)
    public void testAnalyzeForAllDataTypes()
    {
        String tableNameInDatabase = mutableTablesState().get(ALL_TYPES_TABLE_NAME).getNameInDatabase();

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, null, null, null, null, null),
                row("c_smallint", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row("c_bigint", null, null, null, null, null, null),
                row("c_float", null, null, null, null, null, null),
                row("c_double", null, null, null, null, null, null),
                row("c_decimal", null, null, null, null, null, null),
                row("c_decimal_w_params", null, null, null, null, null, null),
                row("c_timestamp", null, null, null, null, null, null),
                row("c_date", null, null, null, null, null, null),
                row("c_string", null, null, null, null, null, null),
                row("c_varchar", null, null, null, null, null, null),
                row("c_char", null, null, null, null, null, null),
                row("c_boolean", null, null, null, null, null, null),
                row("c_binary", null, null, null, null, null, null),
                row(null, null, null, null, anyOf(null, 0.0), null, null)); // anyOf because of different behaviour on HDP (hive 1.2) and CDH (hive 1.1)

        assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(2));

        // SHOW STATS FORMAT: column_name, data_size, distinct_values_count, nulls_fraction, row_count
        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, 2.0, 0.0, null, "121", "127"),
                row("c_smallint", null, 2.0, 0.0, null, "32761", "32767"),
                row("c_int", null, 2.0, 0.0, null, "2147483641", "2147483647"),
                row("c_bigint", null, 2.0, 0.0, null, "9223372036854775807", "9223372036854775807"),
                row("c_float", null, 2.0, 0.0, null, "123.341", "123.345"),
                row("c_double", null, 2.0, 0.0, null, "234.561", "235.567"),
                row("c_decimal", null, 2.0, 0.0, null, "345.0", "346.0"),
                row("c_decimal_w_params", null, 2.0, 0.0, null, "345.671", "345.678"),
                row("c_timestamp", null, 2.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 2.0, 0.0, null, "2015-05-09", "2015-06-10"),
                row("c_string", 22.0, 2.0, 0.0, null, null, null),
                row("c_varchar", 20.0, 2.0, 0.0, null, null, null),
                row("c_char", 12.0, 2.0, 0.0, null, null, null),
                row("c_boolean", null, 2.0, 0.0, null, null, null),
                row("c_binary", 23.0, null, 0.0, null, null, null),
                row(null, null, null, null, 2.0, null, null));
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testAnalyzeForAllDataTypesNoData()
    {
        String tableNameInDatabase = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase();

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, null, null, null, null, null),
                row("c_smallint", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row("c_bigint", null, null, null, null, null, null),
                row("c_float", null, null, null, null, null, null),
                row("c_double", null, null, null, null, null, null),
                row("c_decimal", null, null, null, null, null, null),
                row("c_decimal_w_params", null, null, null, null, null, null),
                row("c_timestamp", null, null, null, null, null, null),
                row("c_date", null, null, null, null, null, null),
                row("c_string", null, null, null, null, null, null),
                row("c_varchar", null, null, null, null, null, null),
                row("c_char", null, null, null, null, null, null),
                row("c_boolean", null, null, null, null, null, null),
                row("c_binary", null, null, null, null, null, null),
                row(null, null, null, null, anyOf(null, 0.0), null, null)); // anyOf because of different behaviour on HDP (hive 1.2) and CDH (hive 1.1)

        assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(0));

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, 0.0, 0.0, null, null, null),
                row("c_smallint", null, 0.0, 0.0, null, null, null),
                row("c_int", null, 0.0, 0.0, null, null, null),
                row("c_bigint", null, 0.0, 0.0, null, null, null),
                row("c_float", null, 0.0, 0.0, null, null, null),
                row("c_double", null, 0.0, 0.0, null, null, null),
                row("c_decimal", null, 0.0, 0.0, null, null, null),
                row("c_decimal_w_params", null, 0.0, 0.0, null, null, null),
                row("c_timestamp", null, 0.0, 0.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 0.0, 0.0, null, null, null),
                row("c_string", 0.0, 0.0, 0.0, null, null, null),
                row("c_varchar", 0.0, 0.0, 0.0, null, null, null),
                row("c_char", 0.0, 0.0, 0.0, null, null, null),
                row("c_boolean", null, 0.0, 0.0, null, null, null),
                row("c_binary", 0.0, null, 0.0, null, null, null),
                row(null, null, null, null, 0.0, null, null));
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testAnalyzeForAllDataTypesOnlyNulls()
    {
        String tableNameInDatabase = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase();

        // insert from hive to prevent Presto collecting statistics on insert
        onHive().executeQuery("INSERT INTO TABLE " + tableNameInDatabase + " VALUES(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null)");

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, null, null, null, null, null),
                row("c_smallint", null, null, null, null, null, null),
                row("c_int", null, null, null, null, null, null),
                row("c_bigint", null, null, null, null, null, null),
                row("c_float", null, null, null, null, null, null),
                row("c_double", null, null, null, null, null, null),
                row("c_decimal", null, null, null, null, null, null),
                row("c_decimal_w_params", null, null, null, null, null, null),
                row("c_timestamp", null, null, null, null, null, null),
                row("c_date", null, null, null, null, null, null),
                row("c_string", null, null, null, null, null, null),
                row("c_varchar", null, null, null, null, null, null),
                row("c_char", null, null, null, null, null, null),
                row("c_boolean", null, null, null, null, null, null),
                row("c_binary", null, null, null, null, null, null),
                row(null, null, null, null, 1.0, null, null));

        assertThat(query("ANALYZE " + tableNameInDatabase)).containsExactly(row(1));

        assertThat(query("SHOW STATS FOR " + tableNameInDatabase)).containsOnly(
                row("c_tinyint", null, 0.0, 1.0, null, null, null),
                row("c_smallint", null, 0.0, 1.0, null, null, null),
                row("c_int", null, 0.0, 1.0, null, null, null),
                row("c_bigint", null, 0.0, 1.0, null, null, null),
                row("c_float", null, 0.0, 1.0, null, null, null),
                row("c_double", null, 0.0, 1.0, null, null, null),
                row("c_decimal", null, 0.0, 1.0, null, null, null),
                row("c_decimal_w_params", null, 0.0, 1.0, null, null, null),
                row("c_timestamp", null, 0.0, 1.0, null, null, null),
                isHiveVersionBefore12()
                        ? row("c_date", null, null, null, null, null, null)
                        : row("c_date", null, 0.0, 1.0, null, null, null),
                row("c_string", 0.0, 0.0, 1.0, null, null, null),
                row("c_varchar", 0.0, 0.0, 1.0, null, null, null),
                row("c_char", 0.0, 0.0, 1.0, null, null, null),
                row("c_boolean", null, 0.0, 1.0, null, null, null),
                row("c_binary", 0.0, null, 1.0, null, null, null),
                row(null, null, null, null, 1.0, null, null));
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testComputeTableStatisticsOnCreateTable()
    {
        String allTypesTable = mutableTablesState().get(ALL_TYPES_TABLE_NAME).getNameInDatabase();
        String emptyAllTypesTable = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase();
        String allTypesAllNullTable = mutableTablesState().get(ALL_TYPES_ALL_NULL_TABLE_NAME).getNameInDatabase();

        assertComputeTableStatisticsOnCreateTable(allTypesTable, getAllTypesTableStatistics());
        assertComputeTableStatisticsOnCreateTable(emptyAllTypesTable, getAllTypesEmptyTableStatistics());
        assertComputeTableStatisticsOnCreateTable(allTypesAllNullTable, getAllTypesAllNullTableStatistics());
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testComputeTableStatisticsOnInsert()
    {
        String allTypesTable = mutableTablesState().get(ALL_TYPES_TABLE_NAME).getNameInDatabase();
        String emptyAllTypesTable = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase();
        String allTypesAllNullTable = mutableTablesState().get(ALL_TYPES_ALL_NULL_TABLE_NAME).getNameInDatabase();

        assertComputeTableStatisticsOnInsert(allTypesTable, getAllTypesTableStatistics());
        assertComputeTableStatisticsOnInsert(emptyAllTypesTable, getAllTypesEmptyTableStatistics());
        assertComputeTableStatisticsOnInsert(allTypesAllNullTable, getAllTypesAllNullTableStatistics());

        String tableName = "test_update_table_statistics";
        query(format("DROP TABLE IF EXISTS %s", tableName));
        try {
            query(format("CREATE TABLE %s AS SELECT * FROM %s WITH NO DATA", tableName, allTypesTable));
            query(format("INSERT INTO %s SELECT * FROM %s", tableName, allTypesTable));
            query(format("INSERT INTO %s SELECT * FROM %s", tableName, allTypesAllNullTable));
            query(format("INSERT INTO %s SELECT * FROM %s", tableName, allTypesAllNullTable));
            assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(
                    row("c_tinyint", null, 2.0, 0.5, null, "121", "127"),
                    row("c_smallint", null, 2.0, 0.5, null, "32761", "32767"),
                    row("c_int", null, 2.0, 0.5, null, "2147483641", "2147483647"),
                    row("c_bigint", null, 2.0, 0.5, null, "9223372036854775807", "9223372036854775807"),
                    row("c_float", null, 2.0, 0.5, null, "123.341", "123.345"),
                    row("c_double", null, 2.0, 0.5, null, "234.561", "235.567"),
                    row("c_decimal", null, 2.0, 0.5, null, "345.0", "346.0"),
                    row("c_decimal_w_params", null, 2.0, 0.5, null, "345.671", "345.678"),
                    row("c_timestamp", null, 2.0, 0.5, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 2.0, 0.5, null, "2015-05-09", "2015-06-10"),
                    row("c_string", 22.0, 2.0, 0.5, null, null, null),
                    row("c_varchar", 20.0, 2.0, 0.5, null, null, null),
                    row("c_char", 12.0, 2.0, 0.5, null, null, null),
                    row("c_boolean", null, 2.0, 0.5, null, null, null),
                    row("c_binary", 23.0, null, 0.5, null, null, null),
                    row(null, null, null, null, 4.0, null, null));

            query(format("INSERT INTO %s VALUES( " +
                    "TINYINT '120', " +
                    "SMALLINT '32760', " +
                    "INTEGER '2147483640', " +
                    "BIGINT '9223372036854775800', " +
                    "REAL '123.340', " +
                    "DOUBLE '234.560', " +
                    "CAST(343.0 AS DECIMAL(10, 0)), " +
                    "CAST(345.670 AS DECIMAL(10, 5)), " +
                    "TIMESTAMP '2015-05-10 12:15:30', " +
                    "DATE '2015-05-08', " +
                    "CAST('ela ma kot' AS VARCHAR), " +
                    "CAST('ela ma ko' AS VARCHAR(10)), " +
                    "CAST('ela m     ' AS CHAR(10)), " +
                    "false, " +
                    "CAST('cGllcyBiaW5hcm54' as VARBINARY))", tableName));

            assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(ImmutableList.of(
                    row("c_tinyint", null, 2.0, 0.4, null, "120", "127"),
                    row("c_smallint", null, 2.0, 0.4, null, "32760", "32767"),
                    row("c_int", null, 2.0, 0.4, null, "2147483640", "2147483647"),
                    row("c_bigint", null, 2.0, 0.4, null, "9223372036854775807", "9223372036854775807"),
                    row("c_float", null, 2.0, 0.4, null, "123.34", "123.345"),
                    row("c_double", null, 2.0, 0.4, null, "234.56", "235.567"),
                    row("c_decimal", null, 2.0, 0.4, null, "343.0", "346.0"),
                    row("c_decimal_w_params", null, 2.0, 0.4, null, "345.67", "345.678"),
                    row("c_timestamp", null, 2.0, 0.4, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 2.0, 0.4, null, "2015-05-08", "2015-06-10"),
                    row("c_string", 32.0, 2.0, 0.4, null, null, null),
                    row("c_varchar", 29.0, 2.0, 0.4, null, null, null),
                    row("c_char", 17.0, 2.0, 0.4, null, null, null),
                    row("c_boolean", null, 2.0, 0.4, null, null, null),
                    row("c_binary", 39.0, null, 0.4, null, null, null),
                    row(null, null, null, null, 5.0, null, null)));
        }
        finally {
            query(format("DROP TABLE IF EXISTS %s", tableName));
        }
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testComputePartitionStatisticsOnCreateTable()
    {
        String tableName = "test_compute_partition_statistics_on_create_table";
        query(format("DROP TABLE IF EXISTS %s", tableName));
        try {
            query(format("CREATE TABLE %s WITH ( " +
                    " partitioned_by = ARRAY['p_bigint', 'p_varchar']" +
                    ") AS " +
                    "SELECT * FROM ( " +
                    "    VALUES " +
                    "        (" +
                    "           TINYINT '120', " +
                    "           SMALLINT '32760', " +
                    "           INTEGER '2147483640', " +
                    "           BIGINT '9223372036854775800', " +
                    "           REAL '123.340', DOUBLE '234.560', " +
                    "           CAST(343.0 AS DECIMAL(10, 0)), " +
                    "           CAST(345.670 AS DECIMAL(10, 5)), " +
                    "           TIMESTAMP '2015-05-10 12:15:30.000', " +
                    "           DATE '2015-05-08', " +
                    "           CAST('p1 varchar' AS VARCHAR), " +
                    "           CAST('p1 varchar10' AS VARCHAR(10)), " +
                    "           CAST('p1 char10' AS CHAR(10)), false, " +
                    "           CAST('p1 binary' as VARBINARY), " +
                    "           BIGINT '1', " +
                    "           CAST('partition1' AS VARCHAR)" +
                    "        ), " +
                    "        (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '1', 'partition1'), " +
                    "        (" +
                    "           TINYINT '99', " +
                    "           SMALLINT '333'," +
                    "           INTEGER '444', " +
                    "           BIGINT '555', " +
                    "           REAL '666.340', " +
                    "           DOUBLE '777.560', " +
                    "           CAST(888.0 AS DECIMAL(10, 0)), " +
                    "           CAST(999.670 AS DECIMAL(10, 5)), " +
                    "           TIMESTAMP '2015-05-10 12:45:30.000', " +
                    "           DATE '2015-05-09', " +
                    "           CAST('p2 varchar' AS VARCHAR), " +
                    "           CAST('p2 varchar10' AS VARCHAR(10)), " +
                    "           CAST('p2 char10' AS CHAR(10)), " +
                    "           true, " +
                    "           CAST('p2 binary' as VARBINARY), " +
                    "           BIGINT '2', " +
                    "           CAST('partition2' AS VARCHAR)" +
                    "        ), " +
                    "        (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '2', 'partition2') " +
                    "" +
                    ") AS t (c_tinyint, c_smallint, c_int, c_bigint, c_float, c_double, c_decimal, c_decimal_w_params, c_timestamp, c_date, c_string, c_varchar, c_char, c_boolean, c_binary, p_bigint, p_varchar)", tableName));

            assertThat(query(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 1 AND p_varchar = 'partition1')", tableName))).containsOnly(ImmutableList.of(
                    row("c_tinyint", null, 1.0, 0.5, null, "120", "120"),
                    row("c_smallint", null, 1.0, 0.5, null, "32760", "32760"),
                    row("c_int", null, 1.0, 0.5, null, "2147483640", "2147483640"),
                    row("c_bigint", null, 1.0, 0.5, null, "9223372036854775807", "9223372036854775807"),
                    row("c_float", null, 1.0, 0.5, null, "123.34", "123.34"),
                    row("c_double", null, 1.0, 0.5, null, "234.56", "234.56"),
                    row("c_decimal", null, 1.0, 0.5, null, "343.0", "343.0"),
                    row("c_decimal_w_params", null, 1.0, 0.5, null, "345.67", "345.67"),
                    row("c_timestamp", null, 1.0, 0.5, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 1.0, 0.5, null, "2015-05-08", "2015-05-08"),
                    row("c_string", 10.0, 1.0, 0.5, null, null, null),
                    row("c_varchar", 10.0, 1.0, 0.5, null, null, null),
                    row("c_char", 9.0, 1.0, 0.5, null, null, null),
                    row("c_boolean", null, 1.0, 0.5, null, null, null),
                    row("c_binary", 9.0, null, 0.5, null, null, null),
                    row("p_bigint", null, 1.0, 0.0, null, "1", "1"),
                    row("p_varchar", 20.0, 1.0, 0.0, null, null, null),
                    row(null, null, null, null, 2.0, null, null)));

            assertThat(query(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 2 AND p_varchar = 'partition2')", tableName))).containsOnly(ImmutableList.of(
                    row("c_tinyint", null, 1.0, 0.5, null, "99", "99"),
                    row("c_smallint", null, 1.0, 0.5, null, "333", "333"),
                    row("c_int", null, 1.0, 0.5, null, "444", "444"),
                    row("c_bigint", null, 1.0, 0.5, null, "555", "555"),
                    row("c_float", null, 1.0, 0.5, null, "666.34", "666.34"),
                    row("c_double", null, 1.0, 0.5, null, "777.56", "777.56"),
                    row("c_decimal", null, 1.0, 0.5, null, "888.0", "888.0"),
                    row("c_decimal_w_params", null, 1.0, 0.5, null, "999.67", "999.67"),
                    row("c_timestamp", null, 1.0, 0.5, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 1.0, 0.5, null, "2015-05-09", "2015-05-09"),
                    row("c_string", 10.0, 1.0, 0.5, null, null, null),
                    row("c_varchar", 10.0, 1.0, 0.5, null, null, null),
                    row("c_char", 9.0, 1.0, 0.5, null, null, null),
                    row("c_boolean", null, 1.0, 0.5, null, null, null),
                    row("c_binary", 9.0, null, 0.5, null, null, null),
                    row("p_bigint", null, 1.0, 0.0, null, "2", "2"),
                    row("p_varchar", 20.0, 1.0, 0.0, null, null, null),
                    row(null, null, null, null, 2.0, null, null)));
        }
        finally {
            query(format("DROP TABLE IF EXISTS %s", tableName));
        }
    }

    @Test
    @Requires(AllTypesTable.class)
    public void testComputePartitionStatisticsOnInsert()
    {
        String tableName = "test_compute_partition_statistics_on_insert";

        query(format("DROP TABLE IF EXISTS %s", tableName));
        try {
            query(format("CREATE TABLE %s(" +
                    "c_tinyint            TINYINT, " +
                    "c_smallint           SMALLINT, " +
                    "c_int                INT, " +
                    "c_bigint             BIGINT, " +
                    "c_float              REAL, " +
                    "c_double             DOUBLE, " +
                    "c_decimal            DECIMAL(10,0), " +
                    "c_decimal_w_params   DECIMAL(10,5), " +
                    "c_timestamp          TIMESTAMP, " +
                    "c_date               DATE, " +
                    "c_string             VARCHAR, " +
                    "c_varchar            VARCHAR(10), " +
                    "c_char               CHAR(10), " +
                    "c_boolean            BOOLEAN, " +
                    "c_binary             VARBINARY, " +
                    "p_bigint             BIGINT, " +
                    "p_varchar            VARCHAR " +
                    ") WITH ( " +
                    " partitioned_by = ARRAY['p_bigint', 'p_varchar']" +
                    ")", tableName));

            query(format("INSERT INTO %s VALUES " +
                    "(TINYINT '120', SMALLINT '32760', INTEGER '2147483640', BIGINT '9223372036854775800', REAL '123.340', DOUBLE '234.560', CAST(343.0 AS DECIMAL(10, 0)), CAST(345.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:15:30', DATE '2015-05-08', 'p1 varchar', CAST('p1 varchar10' AS VARCHAR(10)), CAST('p1 char10' AS CHAR(10)), false, CAST('p1 binary' as VARBINARY), BIGINT '1', 'partition1')," +
                    "(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '1', 'partition1')", tableName));

            query(format("INSERT INTO %s VALUES " +
                    "(TINYINT '99', SMALLINT '333', INTEGER '444', BIGINT '555', REAL '666.340', DOUBLE '777.560', CAST(888.0 AS DECIMAL(10, 0)), CAST(999.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:45:30', DATE '2015-05-09', 'p2 varchar', CAST('p2 varchar10' AS VARCHAR(10)), CAST('p2 char10' AS CHAR(10)), true, CAST('p2 binary' as VARBINARY), BIGINT '2', 'partition2')," +
                    "(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '2', 'partition2')", tableName));

            String showStatsPartitionOne = format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 1 AND p_varchar = 'partition1')", tableName);
            String showStatsPartitionTwo = format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 2 AND p_varchar = 'partition2')", tableName);

            assertThat(query(showStatsPartitionOne)).containsOnly(ImmutableList.of(
                    row("c_tinyint", null, 1.0, 0.5, null, "120", "120"),
                    row("c_smallint", null, 1.0, 0.5, null, "32760", "32760"),
                    row("c_int", null, 1.0, 0.5, null, "2147483640", "2147483640"),
                    row("c_bigint", null, 1.0, 0.5, null, "9223372036854775807", "9223372036854775807"),
                    row("c_float", null, 1.0, 0.5, null, "123.34", "123.34"),
                    row("c_double", null, 1.0, 0.5, null, "234.56", "234.56"),
                    row("c_decimal", null, 1.0, 0.5, null, "343.0", "343.0"),
                    row("c_decimal_w_params", null, 1.0, 0.5, null, "345.67", "345.67"),
                    row("c_timestamp", null, 1.0, 0.5, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 1.0, 0.5, null, "2015-05-08", "2015-05-08"),
                    row("c_string", 10.0, 1.0, 0.5, null, null, null),
                    row("c_varchar", 10.0, 1.0, 0.5, null, null, null),
                    row("c_char", 9.0, 1.0, 0.5, null, null, null),
                    row("c_boolean", null, 1.0, 0.5, null, null, null),
                    row("c_binary", 9.0, null, 0.5, null, null, null),
                    row("p_bigint", null, 1.0, 0.0, null, "1", "1"),
                    row("p_varchar", 20.0, 1.0, 0.0, null, null, null),
                    row(null, null, null, null, 2.0, null, null)));

            assertThat(query(showStatsPartitionTwo)).containsOnly(ImmutableList.of(
                    row("c_tinyint", null, 1.0, 0.5, null, "99", "99"),
                    row("c_smallint", null, 1.0, 0.5, null, "333", "333"),
                    row("c_int", null, 1.0, 0.5, null, "444", "444"),
                    row("c_bigint", null, 1.0, 0.5, null, "555", "555"),
                    row("c_float", null, 1.0, 0.5, null, "666.34", "666.34"),
                    row("c_double", null, 1.0, 0.5, null, "777.56", "777.56"),
                    row("c_decimal", null, 1.0, 0.5, null, "888.0", "888.0"),
                    row("c_decimal_w_params", null, 1.0, 0.5, null, "999.67", "999.67"),
                    row("c_timestamp", null, 1.0, 0.5, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 1.0, 0.5, null, "2015-05-09", "2015-05-09"),
                    row("c_string", 10.0, 1.0, 0.5, null, null, null),
                    row("c_varchar", 10.0, 1.0, 0.5, null, null, null),
                    row("c_char", 9.0, 1.0, 0.5, null, null, null),
                    row("c_boolean", null, 1.0, 0.5, null, null, null),
                    row("c_binary", 9.0, null, 0.5, null, null, null),
                    row("p_bigint", null, 1.0, 0.0, null, "2", "2"),
                    row("p_varchar", 20.0, 1.0, 0.0, null, null, null),
                    row(null, null, null, null, 2.0, null, null)));

            query(format("INSERT INTO %s VALUES( TINYINT '119', SMALLINT '32759', INTEGER '2147483639', BIGINT '9223372036854775799', REAL '122.340', DOUBLE '233.560', CAST(342.0 AS DECIMAL(10, 0)), CAST(344.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:15:29', DATE '2015-05-07', 'p1 varchar', CAST('p1 varchar10' AS VARCHAR(10)), CAST('p1 char10' AS CHAR(10)), true, CAST('p1 binary' as VARBINARY), BIGINT '1', 'partition1')", tableName));
            query(format("INSERT INTO %s VALUES( null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '1', 'partition1')", tableName));

            assertThat(query(showStatsPartitionOne)).containsOnly(ImmutableList.of(
                    row("c_tinyint", null, 1.0, 0.5, null, "119", "120"),
                    row("c_smallint", null, 1.0, 0.5, null, "32759", "32760"),
                    row("c_int", null, 1.0, 0.5, null, "2147483639", "2147483640"),
                    row("c_bigint", null, 1.0, 0.5, null, "9223372036854775807", "9223372036854775807"),
                    row("c_float", null, 1.0, 0.5, null, "122.34", "123.34"),
                    row("c_double", null, 1.0, 0.5, null, "233.56", "234.56"),
                    row("c_decimal", null, 1.0, 0.5, null, "342.0", "343.0"),
                    row("c_decimal_w_params", null, 1.0, 0.5, null, "344.67", "345.67"),
                    row("c_timestamp", null, 1.0, 0.5, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 1.0, 0.5, null, "2015-05-07", "2015-05-08"),
                    row("c_string", 20.0, 1.0, 0.5, null, null, null),
                    row("c_varchar", 20.0, 1.0, 0.5, null, null, null),
                    row("c_char", 18.0, 1.0, 0.5, null, null, null),
                    row("c_boolean", null, 2.0, 0.5, null, null, null),
                    row("c_binary", 18.0, null, 0.5, null, null, null),
                    row("p_bigint", null, 1.0, 0.0, null, "1", "1"),
                    row("p_varchar", 40.0, 1.0, 0.0, null, null, null),
                    row(null, null, null, null, 4.0, null, null)));

            query(format("INSERT INTO %s VALUES( TINYINT '100', SMALLINT '334', INTEGER '445', BIGINT '556', REAL '667.340', DOUBLE '778.560', CAST(889.0 AS DECIMAL(10, 0)), CAST(1000.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:45:31', DATE '2015-05-10', CAST('p2 varchar' AS VARCHAR), CAST('p2 varchar10' AS VARCHAR(10)), CAST('p2 char10' AS CHAR(10)), true, CAST('p2 binary' as VARBINARY), BIGINT '2', 'partition2')", tableName));
            query(format("INSERT INTO %s VALUES( null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '2', 'partition2')", tableName));

            assertThat(query(showStatsPartitionTwo)).containsOnly(ImmutableList.of(
                    row("c_tinyint", null, 1.0, 0.5, null, "99", "100"),
                    row("c_smallint", null, 1.0, 0.5, null, "333", "334"),
                    row("c_int", null, 1.0, 0.5, null, "444", "445"),
                    row("c_bigint", null, 1.0, 0.5, null, "555", "556"),
                    row("c_float", null, 1.0, 0.5, null, "666.34", "667.34"),
                    row("c_double", null, 1.0, 0.5, null, "777.56", "778.56"),
                    row("c_decimal", null, 1.0, 0.5, null, "888.0", "889.0"),
                    row("c_decimal_w_params", null, 1.0, 0.5, null, "999.67", "1000.67"),
                    row("c_timestamp", null, 1.0, 0.5, null, null, null),
                    isHiveVersionBefore12()
                            ? row("c_date", null, null, null, null, null, null)
                            : row("c_date", null, 1.0, 0.5, null, "2015-05-09", "2015-05-10"),
                    row("c_string", 20.0, 1.0, 0.5, null, null, null),
                    row("c_varchar", 20.0, 1.0, 0.5, null, null, null),
                    row("c_char", 18.0, 1.0, 0.5, null, null, null),
                    row("c_boolean", null, 1.0, 0.5, null, null, null),
                    row("c_binary", 18.0, null, 0.5, null, null, null),
                    row("p_bigint", null, 1.0, 0.0, null, "2", "2"),
                    row("p_varchar", 40.0, 1.0, 0.0, null, null, null),
                    row(null, null, null, null, 4.0, null, null)));
        }
        finally {
            query(format("DROP TABLE IF EXISTS %s", tableName));
        }
    }

    @Test(dataProvider = "testComputeFloatingPointStatisticsDataProvider")
    public void testComputeFloatingPointStatistics(String dataType)
    {
        String tableName = "test_compute_floating_point_statistics";
        query("DROP TABLE IF EXISTS " + tableName);
        try {
            query(format("CREATE TABLE %1$s(c_basic %2$s, c_minmax %2$s, c_inf %2$s, c_ninf %2$s, c_nan %2$s, c_nzero %2$s)", tableName, dataType));
            query("ANALYZE " + tableName); // TODO remove after https://github.com/prestosql/presto/issues/2469

            query(format(
                    "INSERT INTO %1$s(c_basic, c_minmax, c_inf, c_ninf, c_nan, c_nzero) VALUES " +
                            "  (%2$s '42.3', %2$s '576234.567',  %2$s 'Infinity', %2$s '-Infinity', %2$s 'NaN', %2$s '-0')," +
                            "  (%2$s '42.3', %2$s '-1234567.89', %2$s '-15', %2$s '45', %2$s '12345', %2$s '-47'), " +
                            "  (NULL, NULL, NULL, NULL, NULL, NULL)",
                    tableName,
                    dataType));

            List expectedStatistics = ImmutableList.of(
                    row("c_basic", null, 1., 0.33333333333, null, "42.3", "42.3"),
                    Objects.equals(dataType, "double")
                            ? row("c_minmax", null, 2., 0.33333333333, null, "-1234567.89", "576234.567")
                            : row("c_minmax", null, 2., 0.33333333333, null, "-1234567.9", "576234.56"),
                    row("c_inf", null, 2., 0.33333333333, null, null, null), // -15, +inf
                    row("c_ninf", null, 2., 0.33333333333, null, null, null), // -inf, 45
                    row("c_nan", null, 2., 0.33333333333, null, "12345.0", "12345.0"), // NaN is ignored by min/max
                    row("c_nzero", null, 2., 0.33333333333, null, "-47.0", "0.0"),
                    row(null, null, null, null, 3., null, null));

            assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(expectedStatistics);

            query("ANALYZE " + tableName);
            assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(expectedStatistics);
        }
        finally {
            query("DROP TABLE IF EXISTS " + tableName);
        }
    }

    @DataProvider
    public Object[][] testComputeFloatingPointStatisticsDataProvider()
    {
        return new Object[][] {
                {"real"},
                {"double"},
        };
    }

    @Test
    public void testMixedHiveAndPrestoStatistics()
    {
        String tableName = "test_mixed_hive_and_presto_statistics";
        onHive().executeQuery(format("DROP TABLE IF EXISTS %s", tableName));
        onHive().executeQuery(format("CREATE TABLE %s (a INT) PARTITIONED BY (p INT) STORED AS ORC TBLPROPERTIES ('transactional' = 'false')", tableName));

        try {
            onHive().executeQuery(format("INSERT OVERWRITE TABLE %s PARTITION (p=1) VALUES (1),(2),(3),(4)", tableName));
            onHive().executeQuery(format("INSERT OVERWRITE TABLE %s PARTITION (p=2) VALUES (10),(11),(12)", tableName));

            String showStatsPartitionOne = format("SHOW STATS FOR (SELECT * FROM %s WHERE p = 1)", tableName);
            String showStatsPartitionTwo = format("SHOW STATS FOR (SELECT * FROM %s WHERE p = 2)", tableName);
            String showStatsWholeTable = format("SHOW STATS FOR %s", tableName);

            // drop all stats; which could have been created on insert
            query(format("CALL system.drop_stats('default', '%s')", tableName));

            // sanity check that there are no statistics
            assertThat(query(showStatsPartitionOne)).containsOnly(
                    row("p", null, null, null, null, null, null),
                    row("a", null, null, null, null, null, null),
                    row(null, null, null, null, null, null, null));
            assertThat(query(showStatsPartitionTwo)).containsOnly(
                    row("p", null, null, null, null, null, null),
                    row("a", null, null, null, null, null, null),
                    row(null, null, null, null, null, null, null));
            assertThat(query(showStatsWholeTable)).containsOnly(
                    row("p", null, null, null, null, null, null),
                    row("a", null, null, null, null, null, null),
                    row(null, null, null, null, null, null, null));

            // analyze first partition with Presto and second with Hive
            query(format("ANALYZE %s WITH (partitions = ARRAY[ARRAY['1']])", tableName));
            onHive().executeQuery(format("ANALYZE TABLE %s PARTITION (p = \"2\") COMPUTE STATISTICS", tableName));
            onHive().executeQuery(format("ANALYZE TABLE %s PARTITION (p = \"2\") COMPUTE STATISTICS FOR COLUMNS", tableName));

            // we can get stats for individual partitions
            assertThat(query(showStatsPartitionOne)).containsOnly(
                    row("p", null, 1.0, 0.0, null, "1", "1"),
                    row("a", null, 4.0, 0.0, null, "1", "4"),
                    row(null, null, null, null, 4.0, null, null));
            assertThat(query(showStatsPartitionTwo)).containsOnly(
                    row("p", null, 1.0, 0.0, null, "2", "2"),
                    row("a", null, 3.0, 0.0, null, "10", "12"),
                    row(null, null, null, null, 3.0, null, null));

            // as well as for whole table
            assertThat(query(showStatsWholeTable)).containsOnly(
                    row("p", null, 2.0, 0.0, null, "1", "2"),
                    row("a", null, 4.0, 0.0, null, "1", "12"),
                    row(null, null, null, null, 7.0, null, null));
        }
        finally {
            onHive().executeQuery("DROP TABLE " + tableName);
        }
    }

    private static void assertComputeTableStatisticsOnCreateTable(String sourceTableName, List expectedStatistics)
    {
        String copiedTableName = "assert_compute_table_statistics_on_create_table_" + sourceTableName;
        query(format("DROP TABLE IF EXISTS %s", copiedTableName));
        try {
            query(format("CREATE TABLE %s AS SELECT * FROM %s", copiedTableName, sourceTableName));
            assertThat(query("SHOW STATS FOR " + copiedTableName)).containsOnly(expectedStatistics);
        }
        finally {
            query(format("DROP TABLE IF EXISTS %s", copiedTableName));
        }
    }

    private void assertComputeTableStatisticsOnInsert(String sourceTableName, List expectedStatistics)
    {
        String copiedTableName = "assert_compute_table_statistics_on_insert_" + sourceTableName;
        query(format("DROP TABLE IF EXISTS %s", copiedTableName));
        try {
            query(format("CREATE TABLE %s AS SELECT * FROM %s WITH NO DATA", copiedTableName, sourceTableName));
            assertThat(query("SHOW STATS FOR " + copiedTableName)).containsOnly(getAllTypesEmptyTableStatistics());
            query(format("INSERT INTO %s SELECT * FROM %s", copiedTableName, sourceTableName));
            assertThat(query("SHOW STATS FOR " + copiedTableName)).containsOnly(expectedStatistics);
        }
        finally {
            query(format("DROP TABLE IF EXISTS %s", copiedTableName));
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy