io.trino.tests.product.hive.TestHiveSparkCompatibility Maven / Gradle / Ivy

Go to download
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.tests.product.hive;

import com.google.common.collect.ImmutableList;
import com.google.inject.Inject;
import io.trino.tempto.ProductTest;
import io.trino.tempto.hadoop.hdfs.HdfsClient;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.IOException;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.tempto.assertions.QueryAssert.Row;
import static io.trino.tempto.assertions.QueryAssert.Row.row;
import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static io.trino.tests.product.TestGroups.HIVE_SPARK;
import static io.trino.tests.product.TestGroups.HIVE_SPARK_NO_STATS_FALLBACK;
import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS;
import static io.trino.tests.product.utils.QueryExecutors.onHive;
import static io.trino.tests.product.utils.QueryExecutors.onSpark;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
import static java.lang.String.format;
import static java.lang.String.join;
import static java.util.Collections.nCopies;
import static java.util.Locale.ENGLISH;
import static org.assertj.core.api.Assertions.assertThat;

public class TestHiveSparkCompatibility
        extends ProductTest
{
    // see spark-defaults.conf
    private static final String TRINO_CATALOG = "hive";

    @Inject
    private HdfsClient hdfsClient;

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS}, dataProvider = "testReadSparkCreatedTableDataProvider")
    public void testReadSparkCreatedTable(String sparkTableFormat, String expectedTrinoTableFormat)
    {
        String sparkTableName = "spark_created_table_" + sparkTableFormat.replaceAll("[^a-zA-Z]", "").toLowerCase(ENGLISH) + "_" + randomNameSuffix();
        String trinoTableName = format("%s.default.%s", TRINO_CATALOG, sparkTableName);

        onSpark().executeQuery(
                "CREATE TABLE default." + sparkTableName + "( " +
                        "  a_boolean boolean, " +
                        "  a_tinyint tinyint, " +
                        "  a_smallint smallint, " +
                        "  an_integer int, " +
                        "  a_bigint bigint, " +
                        "  a_real float, " +
                        "  a_double double, " +
                        "  a_short_decimal decimal(11, 4), " +
                        "  a_long_decimal decimal(26, 7), " +
                        "  a_string string, " +
                        // TODO "  a_binary binary, " +
                        "  a_date date, " +
                        "  a_timestamp_seconds timestamp, " +
                        "  a_timestamp_millis timestamp, " +
                        "  a_timestamp_micros timestamp, " +
                        "  a_timestamp_nanos timestamp, " +
                        // TODO interval
                        // TODO array
                        // TODO struct
                        // TODO map
                        "  a_dummy string) " +
                        sparkTableFormat + " " +
                        // By default Spark creates table as "transactional=true", but doesn't conform to Hive transactional format,
                        // nor file naming convention, so such table cannot be read. As a workaround, force table to be marked
                        // non-transactional.
                        "TBLPROPERTIES ('transactional'='false')");

        // nulls
        onSpark().executeQuery("INSERT INTO " + sparkTableName + " VALUES (" + join(",", nCopies(16, "NULL")) + ")");
        // positive values
        onSpark().executeQuery(
                "INSERT INTO " + sparkTableName + " VALUES (" +
                        "true, " + // a_boolean
                        "127, " + // a_tinyint
                        "32767, " + // a_smallint
                        "1000000000, " + // an_integer
                        "1000000000000000, " + // a_bigint
                        "10000000.123, " + // a_real
                        "100000000000.123, " + // a_double
                        "CAST('1234567.8901' AS decimal(11, 4)), " + // a_short_decimal
                        "CAST('1234567890123456789.0123456' AS decimal(26, 7)), " + // a_short_decimal
                        "'some string', " + // a_string
                        "DATE '2005-09-10', " +  // a_date
                        "TIMESTAMP '2005-09-10 13:00:00', " + // a_timestamp_seconds
                        "TIMESTAMP '2005-09-10 13:00:00.123', " + // a_timestamp_millis
                        "TIMESTAMP '2005-09-10 13:00:00.123456', " + // a_timestamp_micros
                        "TIMESTAMP '2005-09-10 13:00:00.123456789', " + // a_timestamp_nanos
                        "'dummy')");
        // negative values
        onSpark().executeQuery(
                "INSERT INTO " + sparkTableName + " VALUES (" +
                        "false, " + // a_boolean
                        "-128, " + // a_tinyint
                        "-32768, " + // a_smallint
                        "-1000000012, " + // an_integer
                        "-1000000000000012, " + // a_bigint
                        "-10000000.123, " + // a_real
                        "-100000000000.123, " + // a_double
                        "CAST('-1234567.8901' AS decimal(11, 4)), " + // a_short_decimal
                        "CAST('-1234567890123456789.0123456' AS decimal(26, 7)), " + // a_short_decimal
                        "'', " + // a_string
                        "DATE '1965-09-10', " + // a_date
                        "TIMESTAMP '1965-09-10 13:00:00', " + // a_timestamp_seconds
                        "TIMESTAMP '1965-09-10 13:00:00.123', " + // a_timestamp_millis
                        "TIMESTAMP '1965-09-10 13:00:00.123456', " + // a_timestamp_micros
                        "TIMESTAMP '1965-09-10 13:00:00.123456789', " + // a_timestamp_nanos
                        "'dummy')");

        List expected = List.of(
                row(nCopies(16, null).toArray()),
                row(
                        true, // a_boolean≥
                        (byte) 127, // a_tinyint
                        (short) 32767, // a_smallint
                        1000000000, // an_integer
                        1000000000000000L, // a_bigint
                        10000000.123F, // a_real
                        100000000000.123, // a_double
                        new BigDecimal("1234567.8901"), // a_short_decimal
                        new BigDecimal("1234567890123456789.0123456"), // a_long_decimal
                        "some string", // a_string
                        java.sql.Date.valueOf(LocalDate.of(2005, 9, 10)), // a_date
                        java.sql.Timestamp.valueOf(LocalDateTime.of(2005, 9, 10, 13, 0, 0)), // a_timestamp_seconds
                        java.sql.Timestamp.valueOf(LocalDateTime.of(2005, 9, 10, 13, 0, 0, 123_000_000)), // a_timestamp_millis
                        java.sql.Timestamp.valueOf(LocalDateTime.of(2005, 9, 10, 13, 0, 0, 123_456_000)), // a_timestamp_micros
                        java.sql.Timestamp.valueOf(LocalDateTime.of(2005, 9, 10, 13, 0, 0, 123_456_000)), // a_timestamp_nanos; note that Spark timestamp has microsecond precision
                        "dummy"),
                row(
                        false, // a_bigint
                        (byte) -128, // a_tinyint
                        (short) -32768, // a_smallint
                        -1000000012, // an_integer
                        -1000000000000012L, // a_bigint
                        -10000000.123F, // a_real
                        -100000000000.123, // a_double
                        new BigDecimal("-1234567.8901"), // a_short_decimal
                        new BigDecimal("-1234567890123456789.0123456"), // a_long_decimal
                        "", // a_string
                        java.sql.Date.valueOf(LocalDate.of(1965, 9, 10)), // a_date
                        java.sql.Timestamp.valueOf(LocalDateTime.of(1965, 9, 10, 13, 0, 0)), // a_timestamp_seconds
                        java.sql.Timestamp.valueOf(LocalDateTime.of(1965, 9, 10, 13, 0, 0, 123_000_000)), // a_timestamp_millis
                        java.sql.Timestamp.valueOf(LocalDateTime.of(1965, 9, 10, 13, 0, 0, 123_456_000)), // a_timestamp_micros
                        java.sql.Timestamp.valueOf(LocalDateTime.of(1965, 9, 10, 13, 0, 0, 123_456_000)), // a_timestamp_nanos; note that Spark timestamp has microsecond precision
                        "dummy"));
        assertThat(onSpark().executeQuery("SELECT * FROM " + sparkTableName)).containsOnly(expected);
        onTrino().executeQuery("SET SESSION hive.timestamp_precision = 'NANOSECONDS'");
        assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(expected);

        assertThat(onTrino().executeQuery("SHOW CREATE TABLE " + trinoTableName))
                .containsOnly(row(format(
                        "CREATE TABLE %s (\n" +
                                "   a_boolean boolean,\n" +
                                "   a_tinyint tinyint,\n" +
                                "   a_smallint smallint,\n" +
                                "   an_integer integer,\n" +
                                "   a_bigint bigint,\n" +
                                "   a_real real,\n" +
                                "   a_double double,\n" +
                                "   a_short_decimal decimal(11, 4),\n" +
                                "   a_long_decimal decimal(26, 7),\n" +
                                "   a_string varchar,\n" +
                                "   a_date date,\n" +
                                "   a_timestamp_seconds timestamp(9),\n" +
                                "   a_timestamp_millis timestamp(9),\n" +
                                "   a_timestamp_micros timestamp(9),\n" +
                                "   a_timestamp_nanos timestamp(9),\n" +
                                "   a_dummy varchar\n" +
                                ")\n" +
                                "WITH (\n" +
                                "   format = '%s'\n" +
                                ")",
                        trinoTableName,
                        expectedTrinoTableFormat)));

        onSpark().executeQuery("DROP TABLE " + sparkTableName);
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS}, dataProvider = "sparkParquetTimestampFormats")
    public void testSparkParquetTimestampCompatibility(String sparkTimestampFormat, String sparkTimestamp, String[] expectedValues)
    {
        String sparkTableName = "test_spark_parquet_timestamp_compatibility_" + sparkTimestampFormat.toLowerCase(ENGLISH) + "_" + randomNameSuffix();
        String trinoTableName = format("%s.default.%s", TRINO_CATALOG, sparkTableName);

        onSpark().executeQuery("SET spark.sql.parquet.outputTimestampType = " + sparkTimestampFormat);
        onSpark().executeQuery(
                "CREATE TABLE default." + sparkTableName + "(a_timestamp timestamp) " +
                        "USING PARQUET " +
                        "TBLPROPERTIES ('transactional'='false')");

        onSpark().executeQuery("INSERT INTO " + sparkTableName + " VALUES (TIMESTAMP '" + sparkTimestamp + "')");

        for (int i = 0; i < HIVE_TIMESTAMP_PRECISIONS.length; i++) {
            String trinoTimestampPrecision = HIVE_TIMESTAMP_PRECISIONS[i];
            String expected = expectedValues[i];
            onTrino().executeQuery("SET SESSION hive.timestamp_precision = '" + trinoTimestampPrecision + "'");
            assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(row(java.sql.Timestamp.valueOf(expected)));
            assertThat(onTrino().executeQuery("SELECT count(*) FROM " + trinoTableName + " WHERE a_timestamp = TIMESTAMP '" + expected + "'")).containsOnly(row(1));
            assertThat(onTrino().executeQuery("SELECT count(*) FROM " + trinoTableName + " WHERE a_timestamp != TIMESTAMP '" + expected + "'")).containsOnly(row(0));
        }

        onSpark().executeQuery("DROP TABLE " + sparkTableName);
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testSparkClusteringCaseSensitiveCompatibility()
    {
        String sparkTableNameWithClusteringDifferentCase = "test_spark_clustering_case_sensitive_" + randomNameSuffix();
        onSpark().executeQuery(
                String.format("CREATE TABLE %s (row_id int, `segment_id` int, value long) ", sparkTableNameWithClusteringDifferentCase) +
                        "USING PARQUET " +
                        "PARTITIONED BY (`part` string) " +
                        "CLUSTERED BY (`SEGMENT_ID`) " +
                        "  SORTED BY (`SEGMENT_ID`) " +
                        "  INTO 10 BUCKETS");

        onSpark().executeQuery(format("INSERT INTO %s ", sparkTableNameWithClusteringDifferentCase) +
                "VALUES " +
                "  (1, 1, 100, 'part1')," +
                "  (100, 1, 123, 'part2')," +
                "  (101, 2, 202, 'part2')");

        // Ensure that Trino can successfully read from the Spark bucketed table even though the clustering
        // column `SEGMENT_ID` is in a different case than the data column `segment_id`
        assertThat(onTrino().executeQuery(format("SELECT * FROM %s.default.%s", TRINO_CATALOG, sparkTableNameWithClusteringDifferentCase)))
                .containsOnly(List.of(
                        row(1, 1, 100, "part1"),
                        row(100, 1, 123, "part2"),
                        row(101, 2, 202, "part2")));
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testSparkParquetBloomFilterCompatibility()
    {
        String sparkTableNameWithBloomFilter = "test_spark_parquet_bloom_filter_compatibility_enabled_" + randomNameSuffix();
        String sparkTableNameNoBloomFilter = "test_spark_parquet_bloom_filter_compatibility_disabled_" + randomNameSuffix();

        // disable dictionary predicate when testing bloom filter predicate
        onSpark().executeQuery(
                String.format("CREATE TABLE %s (testInteger INT, testLong Long, testString STRING, testDouble DOUBLE, testFloat FLOAT) ", sparkTableNameWithBloomFilter) +
                        "USING PARQUET OPTIONS (" +
                        "'parquet.bloom.filter.enabled'='true'," +
                        "'parquet.enable.dictionary'='false'" +
                        ")");
        onSpark().executeQuery(
                String.format("CREATE TABLE %s (testInteger INT, testLong Long, testString STRING, testDouble DOUBLE, testFloat FLOAT) ", sparkTableNameNoBloomFilter) +
                        "USING PARQUET OPTIONS (" +
                        "'parquet.bloom.filter.enabled'='false'," +
                        "'parquet.enable.dictionary'='false'" +
                        ")");
        String[] sparkTables = new String[] {sparkTableNameWithBloomFilter, sparkTableNameNoBloomFilter};
        String[] trinoTables = new String[] {
                format("%s.default.%s", TRINO_CATALOG, sparkTableNameWithBloomFilter),
                format("%s.default.%s", TRINO_CATALOG, sparkTableNameNoBloomFilter)};

        // control number of spark output files via hint: https://issues.apache.org/jira/browse/SPARK-24940
        // contain values such as aaaaaaaaaaa and zzzzzzzzzzz, this made sure file level statistics: min and max won't take effect
        for (String sparkTable : sparkTables) {
            onSpark().executeQuery(format(
                    "INSERT INTO %s " +
                            "SELECT /*+ REPARTITION(1) */  testInteger, testLong, testString, testDouble, testFloat FROM VALUES " +
                            "  (-999999, -999999, 'aaaaaaaaaaa', -9999999999.99D, -9999999.9999F)" +
                            ", (3, 30, 'fdsvxxbv33cb', 97662.2D, 98862.2F)" +
                            ", (5324, 2466, 'refgfdfrexx', 8796.1D, -65496.1F)" +
                            ", (999999, 9999999999999, 'zzzzzzzzzzz', 9999999999.99D, -9999999.9999F)" +
                            ", (9444, 4132455, 'ff34322vxff', 32137758.7892D, 9978.129887F) AS DATA(testInteger, testLong, testString, testDouble, testFloat)",
                    sparkTable));
        }

        // explicitly make sure using bloom filter statistics
        onTrino().executeQuery("set session hive.parquet_use_bloom_filter=true");
        assertTrinoBloomFilterTableSelectResult(trinoTables);

        // explicitly make sure not using bloom filter statistics
        onTrino().executeQuery("set session hive.parquet_use_bloom_filter=false");
        assertTrinoBloomFilterTableSelectResult(trinoTables);

        for (String sparkTable : sparkTables) {
            onSpark().executeQuery("DROP TABLE " + sparkTable);
        }
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testTrinoSparkParquetBloomFilterCompatibility()
    {
        String trinoTableNameWithBloomFilter = "test_trino_spark_parquet_bloom_filter_compatibility_enabled_" + randomNameSuffix();
        String trioTableNameNoBloomFilter = "test_trino_spark_parquet_bloom_filter_compatibility_disabled_" + randomNameSuffix();

        onTrino().executeQuery(
                String.format("CREATE TABLE %s.default.%s (testInteger INTEGER, testLong BIGINT, testString VARCHAR, testDouble DOUBLE, testFloat REAL) ", TRINO_CATALOG, trinoTableNameWithBloomFilter) +
                        "WITH (" +
                        "format = 'PARQUET'," +
                        "parquet_bloom_filter_columns = ARRAY['testInteger', 'testLong', 'testString', 'testDouble', 'testFloat']" +
                        ")");
        onTrino().executeQuery(
                String.format("CREATE TABLE %s.default.%s (testInteger INTEGER, testLong BIGINT, testString VARCHAR, testDouble DOUBLE, testFloat REAL) WITH (FORMAT = 'PARQUET')", TRINO_CATALOG, trioTableNameNoBloomFilter));
        String[] sparkTables = new String[] {trinoTableNameWithBloomFilter, trioTableNameNoBloomFilter};
        String[] trinoTables = new String[] {
                format("%s.default.%s", TRINO_CATALOG, trinoTableNameWithBloomFilter),
                format("%s.default.%s", TRINO_CATALOG, trioTableNameNoBloomFilter)};

        for (String trinoTable : trinoTables) {
            onTrino().executeQuery(format(
                    "INSERT INTO %s " +
                            "SELECT testInteger, testLong, testString, testDouble, testFloat FROM (VALUES " +
                            "  (-999999, -999999, 'aaaaaaaaaaa', DOUBLE '-9999999999.99', REAL '-9999999.9999')" +
                            ", (3, 30, 'fdsvxxbv33cb', DOUBLE '97662.2', REAL '98862.2')" +
                            ", (5324, 2466, 'refgfdfrexx', DOUBLE '8796.1', REAL '-65496.1')" +
                            ", (999999, 9999999999999, 'zzzzzzzzzzz', DOUBLE '9999999999.99', REAL '-9999999.9999')" +
                            ", (9444, 4132455, 'ff34322vxff', DOUBLE '32137758.7892', REAL '9978.129887')) AS DATA(testInteger, testLong, testString, testDouble, testFloat)",
                    trinoTable));
        }

        assertSparkBloomFilterTableSelectResult(sparkTables);

        for (String trinoTable : trinoTables) {
            onTrino().executeQuery("DROP TABLE " + trinoTable);
        }
    }

    private static void assertTrinoBloomFilterTableSelectResult(String[] trinoTables)
    {
        for (String trinoTable : trinoTables) {
            assertThat(onTrino().executeQuery("SELECT COUNT(*) FROM " + trinoTable + " WHERE testInteger IN (9444, -88777, 6711111)")).containsOnly(List.of(row(1)));
            assertThat(onTrino().executeQuery("SELECT COUNT(*) FROM " + trinoTable + " WHERE testLong IN (4132455, 321324, 312321321322)")).containsOnly(List.of(row(1)));
            assertThat(onTrino().executeQuery("SELECT COUNT(*) FROM " + trinoTable + " WHERE testString IN ('fdsvxxbv33cb', 'cxxx322', 'cxxx323')")).containsOnly(List.of(row(1)));
            assertThat(onTrino().executeQuery("SELECT COUNT(*) FROM " + trinoTable + " WHERE testDouble IN (DOUBLE '97662.2', DOUBLE '-97221.2', DOUBLE '-88777.22233')")).containsOnly(List.of(row(1)));
            assertThat(onTrino().executeQuery("SELECT COUNT(*) FROM " + trinoTable + " WHERE testFloat IN (REAL '-65496.1', REAL '98211862.2', REAL '6761111555.1222')")).containsOnly(List.of(row(1)));
        }
    }

    private static void assertSparkBloomFilterTableSelectResult(String[] sparkTables)
    {
        for (String sparkTable : sparkTables) {
            assertThat(onSpark().executeQuery("SELECT COUNT(*) FROM " + sparkTable + " WHERE testInteger IN (9444, -88777, 6711111)")).containsOnly(List.of(row(1)));
            assertThat(onSpark().executeQuery("SELECT COUNT(*) FROM " + sparkTable + " WHERE testLong IN (4132455, 321324, 312321321322)")).containsOnly(List.of(row(1)));
            assertThat(onSpark().executeQuery("SELECT COUNT(*) FROM " + sparkTable + " WHERE testString IN ('fdsvxxbv33cb', 'cxxx322', 'cxxx323')")).containsOnly(List.of(row(1)));
            assertThat(onSpark().executeQuery("SELECT COUNT(*) FROM " + sparkTable + " WHERE testDouble IN (97662.2D, -97221.2D, -88777.22233D)")).containsOnly(List.of(row(1)));
            assertThat(onSpark().executeQuery("SELECT COUNT(*) FROM " + sparkTable + " WHERE testFloat IN (-65496.1F, 98211862.2F, 6761111555.1222F)")).containsOnly(List.of(row(1)));
        }
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testInsertFailsOnBucketedTableCreatedBySpark()
    {
        String hiveTableName = "spark_insert_bucketed_table_" + randomNameSuffix();

        onSpark().executeQuery(
                "CREATE TABLE default." + hiveTableName + "(a_key integer, a_value integer) " +
                        "USING PARQUET " +
                        "CLUSTERED BY (a_key) INTO 3 BUCKETS");

        assertQueryFailure(() -> onTrino().executeQuery("INSERT INTO default." + hiveTableName + " VALUES (1, 100)"))
                .hasMessageContaining("Inserting into Spark bucketed tables is not supported");

        onSpark().executeQuery("DROP TABLE " + hiveTableName);
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testUpdateFailsOnBucketedTableCreatedBySpark()
    {
        String hiveTableName = "spark_update_bucketed_table_" + randomNameSuffix();

        onSpark().executeQuery(
                "CREATE TABLE default." + hiveTableName + "(a_key integer, a_value integer) " +
                        "USING ORC " +
                        "CLUSTERED BY (a_key) INTO 3 BUCKETS");

        assertQueryFailure(() -> onTrino().executeQuery("UPDATE default." + hiveTableName + " SET a_value = 100 WHERE a_key = 1"))
                .hasMessageContaining("Merging into Spark bucketed tables is not supported");

        onSpark().executeQuery("DROP TABLE " + hiveTableName);
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testDeleteFailsOnBucketedTableCreatedBySpark()
    {
        String hiveTableName = "spark_delete_bucketed_table_" + randomNameSuffix();

        onSpark().executeQuery(
                "CREATE TABLE default." + hiveTableName + "(a_key integer, a_value integer) " +
                        "USING ORC " +
                        "CLUSTERED BY (a_key) INTO 3 BUCKETS");

        assertQueryFailure(() -> onTrino().executeQuery("DELETE FROM default." + hiveTableName + " WHERE a_key = 1"))
                .hasMessageContaining("Merging into Spark bucketed tables is not supported");

        onSpark().executeQuery("DROP TABLE " + hiveTableName);
    }

    private static final String[] HIVE_TIMESTAMP_PRECISIONS = new String[] {"MILLISECONDS", "MICROSECONDS", "NANOSECONDS"};

    @DataProvider
    public static Object[][] sparkParquetTimestampFormats()
    {
        String millisTimestamp = "2005-09-10 13:00:00.123";
        String microsTimestamp = "2005-09-10 13:00:00.123456";
        String nanosTimestamp = "2005-09-10 13:00:00.123456789";

        // Ordering of expected values matches the ordering in HIVE_TIMESTAMP_PRECISIONS
        return new Object[][] {
                {"TIMESTAMP_MILLIS", millisTimestamp, new String[] {millisTimestamp, millisTimestamp, millisTimestamp}},
                {"TIMESTAMP_MICROS", microsTimestamp, new String[] {millisTimestamp, microsTimestamp, microsTimestamp}},
                // note that Spark timestamp has microsecond precision
                {"INT96", nanosTimestamp, new String[] {millisTimestamp, microsTimestamp, microsTimestamp}},
        };
    }

    @DataProvider
    public static Object[][] testReadSparkCreatedTableDataProvider()
    {
        return new Object[][] {
                {"USING ORC", "ORC"},
                {"USING PARQUET", "PARQUET"},
                // TODO add Avro
        };
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testReadTrinoCreatedOrcTable()
    {
        testReadTrinoCreatedTable("using_orc", "ORC");
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testReadTrinoCreatedParquetTable()
    {
        testReadTrinoCreatedTable("using_parquet", "PARQUET");
    }

    private void testReadTrinoCreatedTable(String tableName, String tableFormat)
    {
        String sparkTableName = "trino_created_table_" + tableName + "_" + randomNameSuffix();
        String trinoTableName = format("%s.default.%s", TRINO_CATALOG, sparkTableName);

        // Spark timestamps are in microsecond precision
        onTrino().executeQuery("SET SESSION hive.timestamp_precision = 'MICROSECONDS'");
        onTrino().executeQuery(format(
                "CREATE TABLE %s ( " +
                        "   a_boolean boolean, " +
                        "   a_tinyint tinyint, " +
                        "   a_smallint smallint, " +
                        "   an_integer integer, " +
                        "   a_bigint bigint, " +
                        "   a_real real, " +
                        "   a_double double, " +
                        "   a_short_decimal decimal(11, 4), " +
                        "   a_long_decimal decimal(26, 7), " +
                        "   a_string varchar, " +
                        // TODO binary
                        "   a_date date, " +
                        "   a_timestamp_seconds timestamp(6), " +
                        "   a_timestamp_millis timestamp(6), " +
                        "   a_timestamp_micros timestamp(6), " +
                        // TODO interval
                        // TODO array
                        // TODO struct
                        // TODO map
                        "   a_dummy varchar " +
                        ") " +
                        "WITH ( " +
                        "   format = '%s' " +
                        ")",
                trinoTableName,
                tableFormat));

        // nulls
        onTrino().executeQuery("INSERT INTO " + trinoTableName + " VALUES (" + join(",", nCopies(15, "NULL")) + ")");
        // positive values
        onTrino().executeQuery(
                "INSERT INTO " + trinoTableName + " VALUES (" +
                        "true, " + // a_boolean
                        "127, " + // a_tinyint
                        "32767, " + // a_smallint
                        "1000000000, " + // an_integer
                        "1000000000000000, " + // a_bigint
                        "10000000.123, " + // a_real
                        "100000000000.123, " + // a_double
                        "CAST('1234567.8901' AS decimal(11, 4)), " + // a_short_decimal
                        "CAST('1234567890123456789.0123456' AS decimal(26, 7)), " + // a_short_decimal
                        "'some string', " + // a_string
                        "DATE '2005-09-10', " +  // a_date
                        "TIMESTAMP '2005-09-10 13:00:00', " + // a_timestamp_seconds
                        "TIMESTAMP '2005-09-10 13:00:00.123', " + // a_timestamp_millis
                        "TIMESTAMP '2005-09-10 13:00:00.123456', " + // a_timestamp_micros
                        "'dummy')");
        // negative values
        onTrino().executeQuery(
                "INSERT INTO " + trinoTableName + " VALUES (" +
                        "false, " + // a_boolean
                        "-128, " + // a_tinyint
                        "-32768, " + // a_smallint
                        "-1000000012, " + // an_integer
                        "-1000000000000012, " + // a_bigint
                        "-10000000.123, " + // a_real
                        "-100000000000.123, " + // a_double
                        "CAST('-1234567.8901' AS decimal(11, 4)), " + // a_short_decimal
                        "CAST('-1234567890123456789.0123456' AS decimal(26, 7)), " + // a_short_decimal
                        "'', " + // a_string
                        "DATE '1965-09-10', " + // a_date
                        "TIMESTAMP '1965-09-10 13:00:00', " + // a_timestamp_seconds
                        "TIMESTAMP '1965-09-10 13:00:00.123', " + // a_timestamp_millis
                        "TIMESTAMP '1965-09-10 13:00:00.123456', " + // a_timestamp_micros
                        "'dummy')");

        List expected = List.of(
                row(nCopies(15, null).toArray()),
                row(
                        true, // a_boolean
                        (byte) 127, // a_tinyint
                        (short) 32767, // a_smallint
                        1000000000, // an_integer
                        1000000000000000L, // a_bigint
                        10000000.123F, // a_real
                        100000000000.123, // a_double
                        new BigDecimal("1234567.8901"), // a_short_decimal
                        new BigDecimal("1234567890123456789.0123456"), // a_long_decimal
                        "some string", // a_string
                        java.sql.Date.valueOf(LocalDate.of(2005, 9, 10)), // a_date
                        java.sql.Timestamp.valueOf(LocalDateTime.of(2005, 9, 10, 13, 0, 0)), // a_timestamp_seconds
                        java.sql.Timestamp.valueOf(LocalDateTime.of(2005, 9, 10, 13, 0, 0, 123_000_000)), // a_timestamp_millis
                        java.sql.Timestamp.valueOf(LocalDateTime.of(2005, 9, 10, 13, 0, 0, 123_456_000)), // a_timestamp_micros
                        "dummy"),
                row(
                        false, // a_bigint
                        (byte) -128, // a_tinyint
                        (short) -32768, // a_smallint
                        -1000000012, // an_integer
                        -1000000000000012L, // a_bigint
                        -10000000.123F, // a_real
                        -100000000000.123, // a_double
                        new BigDecimal("-1234567.8901"), // a_short_decimal
                        new BigDecimal("-1234567890123456789.0123456"), // a_long_decimal
                        "", // a_string
                        java.sql.Date.valueOf(LocalDate.of(1965, 9, 10)), // a_date
                        java.sql.Timestamp.valueOf(LocalDateTime.of(1965, 9, 10, 13, 0, 0)), // a_timestamp_seconds
                        java.sql.Timestamp.valueOf(LocalDateTime.of(1965, 9, 10, 13, 0, 0, 123_000_000)), // a_timestamp_millis
                        java.sql.Timestamp.valueOf(LocalDateTime.of(1965, 9, 10, 13, 0, 0, 123_456_000)), // a_timestamp_micros
                        "dummy"));
        assertThat(onSpark().executeQuery("SELECT * FROM " + sparkTableName)).containsOnly(expected);
        assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(expected);

        onTrino().executeQuery("DROP TABLE " + trinoTableName);
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testReadSparkdDateAndTimePartitionName()
    {
        String sparkTableName = "test_trino_reading_spark_date_and_time_type_partitioned_" + randomNameSuffix();
        String trinoTableName = format("%s.default.%s", TRINO_CATALOG, sparkTableName);

        onSpark().executeQuery(format("CREATE TABLE default.%s (value integer) PARTITIONED BY (dt date)", sparkTableName));

        // Spark allows creating partition with time unit
        // Hive denies creating such partitions, but allows reading
        onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='2022-04-13 00:00:00.000000000') VALUES (1)", sparkTableName));
        onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='2022-04-13 00:00:00') VALUES (2)", sparkTableName));
        onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='2022-04-13 00:00') VALUES (3)", sparkTableName));
        onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='12345-06-07') VALUES (4)", sparkTableName));
        /// This INSERT statement was supported in older Spark version
        assertQueryFailure(() -> onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='123-04-05') VALUES (5)", sparkTableName)))
                .hasMessageContaining("cannot be cast to \"DATE\" because it is malformed");
        onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='-0001-01-01') VALUES (6)", sparkTableName));

        assertThat(onTrino().executeQuery("SELECT value, \"$partition\" FROM " + trinoTableName))
                .containsOnly(List.of(
                        row(1, "dt=2022-04-13"),
                        row(2, "dt=2022-04-13"),
                        row(3, "dt=2022-04-13"),
                        row(4, "dt=+12345-06-07"),
                        row(6, "dt=-0001-01-01")));

        // Use date_format function to avoid exception due to java.sql.Date.valueOf() with 5 digit year
        assertThat(onSpark().executeQuery("SELECT value, date_format(dt, 'yyyy-MM-dd') FROM " + sparkTableName))
                .containsOnly(List.of(
                        row(1, "2022-04-13"),
                        row(2, "2022-04-13"),
                        row(3, "2022-04-13"),
                        row(4, "+12345-06-07"),
                        row(6, "-0001-01-01")));

        // Use date_format function to avoid exception due to java.sql.Date.valueOf() with 5 digit year
        assertThat(onHive().executeQuery("SELECT value, date_format(dt, 'yyyy-MM-dd') FROM " + sparkTableName))
                .containsOnly(List.of(
                        row(1, "2022-04-13"),
                        row(2, "2022-04-13"),
                        row(3, "2022-04-13"),
                        row(4, null),
                        row(6, "0002-01-03")));

        // Cast to varchar so that we can compare with Spark & Hive easily
        assertThat(onTrino().executeQuery("SELECT value, CAST(dt AS VARCHAR) FROM " + trinoTableName))
                .containsOnly(List.of(
                        row(1, "2022-04-13"),
                        row(2, "2022-04-13"),
                        row(3, "2022-04-13"),
                        row(4, "12345-06-07"),
                        row(6, "-0001-01-01")));

        onTrino().executeQuery("DROP TABLE " + trinoTableName);
    }

    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
    public void testTextInputFormatWithParquetHiveSerDe()
            throws IOException
    {
        String tableName = "test_text_input_format_with_parquet_hive_ser_de" + randomNameSuffix();
        onHive().executeQuery("" +
                "CREATE EXTERNAL TABLE " + tableName +
                "(col INT) " +
                "ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' " +
                "STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' " +
                "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' " +
                "LOCATION '/tmp/" + tableName + "'");
        onSpark().executeQuery("INSERT INTO " + tableName + " VALUES(1)");
        assertThat(onSpark().executeQuery("SELECT * FROM " + tableName)).containsOnly(row(1));
        assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(row(1));
        List files = hdfsClient.listDirectory("/tmp/" + tableName + "/");
        assertThat(files).hasSize(2);
        assertThat(files.stream().filter(name -> !name.contains("SUCCESS")).collect(toImmutableList()).get(0)).endsWith("parquet");
        List