All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.tests.product.hudi.TestHudiSparkCompatibility Maven / Gradle / Ivy

There is a newer version: 464
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.tests.product.hudi;

import com.google.common.collect.ImmutableList;
import io.trino.tempto.BeforeMethodWithContext;
import io.trino.tempto.ProductTest;
import io.trino.tempto.assertions.QueryAssert;
import org.testng.annotations.Test;

import java.util.List;

import static io.trino.tempto.assertions.QueryAssert.Row.row;
import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static io.trino.tests.product.TestGroups.HIVE_HUDI_REDIRECTIONS;
import static io.trino.tests.product.TestGroups.HUDI;
import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS;
import static io.trino.tests.product.utils.QueryExecutors.onHudi;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.assertj.core.api.Assertions.assertThat;

public class TestHudiSparkCompatibility
        extends ProductTest
{
    private static final String COW_TABLE_TYPE = "cow";
    private static final String MOR_TABLE_TYPE = "mor";

    private String bucketName;

    @BeforeMethodWithContext
    public void setUp()
    {
        bucketName = requireNonNull(System.getenv("S3_BUCKET"), "Environment variable not set: S3_BUCKET");
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testCopyOnWriteShowCreateTable()
    {
        String tableName = "test_hudi_cow_show_create_" + randomNameSuffix();

        createNonPartitionedTable(tableName, COW_TABLE_TYPE);

        try {
            assertThat((String) onTrino().executeQuery("SHOW CREATE TABLE hudi.default." + tableName).getOnlyValue())
                    .isEqualTo(format(
                            "CREATE TABLE hudi.default.%s (\n" +
                                    "   _hoodie_commit_time varchar,\n" +
                                    "   _hoodie_commit_seqno varchar,\n" +
                                    "   _hoodie_record_key varchar,\n" +
                                    "   _hoodie_partition_path varchar,\n" +
                                    "   _hoodie_file_name varchar,\n" +
                                    "   id bigint,\n" +
                                    "   name varchar,\n" +
                                    "   price integer,\n" +
                                    "   ts bigint\n" +
                                    ")\n" +
                                    "WITH (\n" +
                                    "   location = 's3://%s/%s'\n" +
                                    ")",
                            tableName,
                            bucketName,
                            tableName));
            String lastCommitTimeSync = (String) onHudi().executeQuery("show TBLPROPERTIES " + tableName + " ('last_commit_time_sync')").project(2).getOnlyValue();
            assertThat((String) onHudi().executeQuery("SHOW CREATE TABLE default." + tableName).getOnlyValue())
                    .isEqualTo(format("""
                                    CREATE TABLE default.%s (
                                      _hoodie_commit_time STRING,
                                      _hoodie_commit_seqno STRING,
                                      _hoodie_record_key STRING,
                                      _hoodie_partition_path STRING,
                                      _hoodie_file_name STRING,
                                      id BIGINT,
                                      name STRING,
                                      price INT,
                                      ts BIGINT)
                                    USING hudi
                                    LOCATION 's3://%s/%s'
                                    TBLPROPERTIES (
                                      'last_commit_time_sync' = '%s',
                                      'preCombineField' = 'ts',
                                      'primaryKey' = 'id',
                                      'type' = 'cow')
                                    """,
                            tableName,
                            bucketName,
                            tableName,
                            lastCommitTimeSync));
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testCopyOnWriteTableSelect()
    {
        String tableName = "test_hudi_cow_select_" + randomNameSuffix();

        createNonPartitionedTable(tableName, COW_TABLE_TYPE);

        List expectedRows = ImmutableList.of(
                row(1, "a1"),
                row(2, "a2"));

        try {
            assertThat(onHudi().executeQuery("SELECT id, name FROM default." + tableName))
                    .containsOnly(expectedRows);
            assertThat(onTrino().executeQuery("SELECT id, name FROM hudi.default." + tableName))
                    .containsOnly(expectedRows);
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testCopyOnWritePartitionedTableSelect()
    {
        String tableName = "test_hudi_cow_partitioned_select_" + randomNameSuffix();

        createPartitionedTable(tableName, COW_TABLE_TYPE);

        List expectedRows = ImmutableList.of(
                row(1, "a1", 1000, "2021-12-09", "10"),
                row(2, "a2", 1000, "2021-12-09", "11"));

        try {
            assertThat(onHudi().executeQuery("SELECT id, name, ts, dt, hh FROM default." + tableName))
                    .containsOnly(expectedRows);
            assertThat(onTrino().executeQuery("SELECT id, name, ts, dt, hh FROM hudi.default." + tableName))
                    .containsOnly(expectedRows);

            expectedRows = ImmutableList.of(row(2, "a2", 1000));
            assertThat(onTrino().executeQuery("SELECT id, name, ts FROM hudi.default." + tableName + " WHERE dt = '2021-12-09' AND hh = '11'"))
                    .containsOnly(expectedRows);
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testCopyOnWriteTableSelectAfterUpdate()
    {
        String tableName = "test_hudi_cow_select_after_update" + randomNameSuffix();

        createPartitionedTable(tableName, COW_TABLE_TYPE);

        List expectedRows = ImmutableList.of(
                row(1, "a1"),
                row(2, "a2"));

        try {
            assertThat(onHudi().executeQuery("SELECT id, name FROM default." + tableName))
                    .containsOnly(expectedRows);
            assertThat(onTrino().executeQuery("SELECT id, name FROM hudi.default." + tableName))
                    .containsOnly(expectedRows);

            onHudi().executeQuery("UPDATE default." + tableName + " SET name = 'a1_1', ts = 1001 WHERE id = 1");
            expectedRows = ImmutableList.of(
                    row(1, "a1_1", 1001),
                    row(2, "a2", 1000));
            assertThat(onHudi().executeQuery("SELECT id, name, ts FROM default." + tableName))
                    .containsOnly(expectedRows);
            assertThat(onTrino().executeQuery("SELECT id, name, ts FROM hudi.default." + tableName))
                    .containsOnly(expectedRows);
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testMergeOnReadTableSelect()
    {
        String tableName = "test_hudi_mor_select_" + randomNameSuffix();

        createNonPartitionedTable(tableName, MOR_TABLE_TYPE);

        List expectedRows = ImmutableList.of(
                row(1, "a1", 20, 1000),
                row(2, "a2", 40, 2000));

        try {
            assertThat(onHudi().executeQuery("SELECT id, name, price, ts FROM default." + tableName))
                    .containsOnly(expectedRows);
            assertThat(onTrino().executeQuery("SELECT id, name, price, ts FROM hudi.default." + tableName))
                    .containsOnly(expectedRows);
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testMergeOnReadTableSelectAfterUpdate()
    {
        String tableName = "test_hudi_mor_update" + randomNameSuffix();

        createNonPartitionedTable(tableName, MOR_TABLE_TYPE);

        List expectedRows = ImmutableList.of(
                row(1, "a1", 20, 1000),
                row(2, "a2", 40, 2000));

        try {
            assertThat(onHudi().executeQuery("SELECT id, name, price, ts FROM default." + tableName))
                    .containsOnly(expectedRows);
            assertThat(onTrino().executeQuery("SELECT id, name, price, ts FROM hudi.default." + tableName))
                    .containsOnly(expectedRows);

            onHudi().executeQuery("UPDATE default." + tableName + " SET ts = 2020 WHERE id = 2");
            List expectedRowsAfterUpdate = ImmutableList.of(
                    row(1, "a1", 20, 1000),
                    row(2, "a2", 40, 2020));
            assertThat(onHudi().executeQuery("SELECT id, name, price, ts FROM default." + tableName))
                    .containsOnly(expectedRowsAfterUpdate);
            // NOTE: MOR Snapshot queries are not supported yet.
            // "_ro" suffix to the table indicates read-optimized query.
            assertThat(onTrino().executeQuery("SELECT id, name, price, ts FROM hudi.default." + tableName + "_ro"))
                    .containsOnly(expectedRows);
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testMergeOnReadPartitionedTableSelect()
    {
        String tableName = "test_hudi_mor_partitioned_select_" + randomNameSuffix();

        createPartitionedTable(tableName, MOR_TABLE_TYPE);

        List expectedRows = ImmutableList.of(
                row(1, "a1", 1000, "2021-12-09", "10"),
                row(2, "a2", 1000, "2021-12-09", "11"));

        try {
            assertThat(onHudi().executeQuery("SELECT id, name, ts, dt, hh FROM default." + tableName))
                    .containsOnly(expectedRows);
            assertThat(onTrino().executeQuery("SELECT id, name, ts, dt, hh FROM hudi.default." + tableName + "_ro"))
                    .containsOnly(expectedRows);

            expectedRows = ImmutableList.of(row(2, "a2", 1000));
            assertThat(onTrino().executeQuery("SELECT id, name, ts FROM hudi.default." + tableName + "_ro WHERE dt = '2021-12-09' AND hh = '11'"))
                    .containsOnly(expectedRows);
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testCopyOnWriteTableSelectWithSessionProperties()
    {
        String tableName = "test_hudi_cow_select_session_props" + randomNameSuffix();

        createNonPartitionedTable(tableName, COW_TABLE_TYPE);

        try {
            assertThat(onTrino().executeQuery("SELECT id, name FROM hudi.default." + tableName))
                    .containsOnly(ImmutableList.of(
                            row(1, "a1"),
                            row(2, "a2")));
            onTrino().executeQuery(
                    "SET SESSION hudi.columns_to_hide = ARRAY['_hoodie_commit_time','_hoodie_commit_seqno','_hoodie_record_key','_hoodie_partition_path','_hoodie_file_name']");
            assertThat(onTrino().executeQuery("SELECT * FROM hudi.default." + tableName))
                    .containsOnly(ImmutableList.of(
                            row(1, "a1", 20, 1000),
                            row(2, "a2", 40, 2000)));
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testTimelineTable()
    {
        String tableName = "test_hudi_timeline_system_table_" + randomNameSuffix();
        createNonPartitionedTable(tableName, COW_TABLE_TYPE);
        try {
            assertThat(onTrino().executeQuery(format("SELECT action, state FROM hudi.default.\"%s$timeline\"", tableName)))
                    .containsOnly(row("commit", "COMPLETED"));
        }
        finally {
            onHudi().executeQuery("DROP TABLE " + tableName);
        }
    }

    @Test(groups = {HIVE_HUDI_REDIRECTIONS, PROFILE_SPECIFIC_TESTS})
    public void testTimelineTableRedirect()
    {
        String tableName = "test_hudi_timeline_system_table_redirect_" + randomNameSuffix();
        String nonExistingTableName = tableName + "_non_existing";
        createNonPartitionedTable(tableName, COW_TABLE_TYPE);
        try {
            assertThat(onTrino().executeQuery(format("SELECT action, state FROM hive.default.\"%s$timeline\"", tableName)))
                    .containsOnly(row("commit", "COMPLETED"));
            assertQueryFailure(() -> onTrino().executeQuery(format("SELECT * FROM hive.default.\"%s$timeline\"", nonExistingTableName)))
                    .hasMessageMatching(".*Table 'hive.default.\"test_hudi_timeline_system_table_redirect_.*_non_existing\\$timeline\"' does not exist");
        }
        finally {
            onHudi().executeQuery("DROP TABLE " + tableName);
        }
    }

    @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS})
    public void testReadCopyOnWriteTableWithReplaceCommits()
    {
        String tableName = "test_hudi_cow_replace_commits_select_" + randomNameSuffix();

        onHudi().executeQuery("CREATE TABLE default." + tableName +
                              "(id bigint, name string, ts bigint)" +
                              "USING hudi " +
                              "TBLPROPERTIES (" +
                              " type = 'cow'," +
                              " primaryKey = 'id'," +
                              " preCombineField = 'ts'," +
                              " hoodie.clustering.inline = 'true'," +
                              " hoodie.clustering.inline.max.commits = '1')" +
                              "LOCATION 's3://" + bucketName + "/" + tableName + "'");

        try {
            onHudi().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 'a1', 1000), (2, 'a2', 2000)");
            assertThat(onTrino().executeQuery("SELECT id, name FROM hudi.default." + tableName))
                    .containsOnly(row(1, "a1"), row(2, "a2"));
        }
        finally {
            onHudi().executeQuery("DROP TABLE default." + tableName);
        }
    }

    private void createNonPartitionedTable(String tableName, String tableType)
    {
        onHudi().executeQuery(format(
                """
                        CREATE TABLE default.%s (
                          id bigint,
                          name string,
                          price int,
                          ts bigint)
                        USING hudi
                        TBLPROPERTIES (
                          type = '%s',
                          primaryKey = 'id',
                          preCombineField = 'ts')
                        LOCATION 's3://%s/%s'""",
                tableName,
                tableType,
                bucketName,
                tableName));

        onHudi().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 'a1', 20, 1000), (2, 'a2', 40, 2000)");
    }

    private void createPartitionedTable(String tableName, String tableType)
    {
        onHudi().executeQuery(format(
                """
                        CREATE TABLE default.%s (
                          id bigint,
                          name string,
                          ts bigint,
                          dt string,
                          hh string)
                        USING hudi
                        TBLPROPERTIES (
                          type = '%s',
                          primaryKey = 'id',
                          preCombineField = 'ts')
                        PARTITIONED BY (dt, hh)
                        LOCATION 's3://%s/%s'""",
                tableName,
                tableType,
                bucketName,
                tableName));

        onHudi().executeQuery("INSERT INTO default." + tableName + " PARTITION (dt, hh) SELECT 1 AS id, 'a1' AS name, 1000 AS ts, '2021-12-09' AS dt, '10' AS hh");
        onHudi().executeQuery("INSERT INTO default." + tableName + " PARTITION (dt = '2021-12-09', hh='11') SELECT 2, 'a2', 1000");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy