io.trino.tests.product.hive.TestHivePartitionProcedures Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.hive;
import com.google.inject.Inject;
import io.trino.tempto.ProductTest;
import io.trino.tempto.fulfillment.table.hive.HiveDataSource;
import io.trino.tempto.hadoop.hdfs.HdfsClient;
import io.trino.tempto.internal.hadoop.hdfs.HdfsDataSourceWriter;
import io.trino.tempto.query.QueryResult;
import io.trino.testng.services.Flaky;
import org.testng.annotations.Test;
import java.util.Set;
import java.util.stream.Collectors;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure;
import static io.trino.tempto.fulfillment.table.hive.InlineDataSource.createResourceDataSource;
import static io.trino.tests.product.TestGroups.SMOKE;
import static io.trino.tests.product.hive.util.TableLocationUtils.getTablePath;
import static io.trino.tests.product.utils.HadoopTestUtils.RETRYABLE_FAILURES_ISSUES;
import static io.trino.tests.product.utils.HadoopTestUtils.RETRYABLE_FAILURES_MATCH;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThat;
public class TestHivePartitionProcedures
extends ProductTest
{
private static final String OUTSIDE_TABLES_DIRECTORY_PATH = "/user/hive/dangling";
private static final String FIRST_TABLE = "first_table";
private static final String SECOND_TABLE = "second_table";
private static final String VIEW_TABLE = "view_table";
@Inject
private HdfsClient hdfsClient;
@Inject
private HdfsDataSourceWriter hdfsDataSourceWriter;
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testUnregisterPartition()
{
createPartitionedTable(FIRST_TABLE);
assertThat(getTableCount(FIRST_TABLE)).isEqualTo(3L);
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("a", "b", "c");
dropPartition(FIRST_TABLE, "col", "a");
assertThat(getTableCount(FIRST_TABLE)).isEqualTo(2L);
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("b", "c");
// should not drop data
assertThat(hdfsClient.exist(getTablePath(FIRST_TABLE, 1) + "/col=a/")).isTrue();
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testUnregisterViewTableShouldFail()
{
createPartitionedTable(FIRST_TABLE);
createView(VIEW_TABLE, FIRST_TABLE);
assertQueryFailure(() -> dropPartition(VIEW_TABLE, "col", "a"))
.hasMessageContaining("Table is a view: default." + VIEW_TABLE);
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testUnregisterMissingTableShouldFail()
{
createPartitionedTable(FIRST_TABLE);
assertQueryFailure(() -> dropPartition("missing_table", "col", "f"))
.hasMessageContaining("Table 'default.missing_table' not found");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testUnregisterUnpartitionedTableShouldFail()
{
createUnpartitionedTable(SECOND_TABLE);
assertQueryFailure(() -> dropPartition(SECOND_TABLE, "col", "a"))
.hasMessageContaining("Table is not partitioned: default." + SECOND_TABLE);
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testUnregisterInvalidPartitionColumnsShouldFail()
{
createPartitionedTable(FIRST_TABLE);
assertQueryFailure(() -> dropPartition(FIRST_TABLE, "not_existing_partition_col", "a"))
.hasMessageContaining("Provided partition column names do not match actual partition column names: [col]");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testUnregisterMissingPartitionShouldFail()
{
createPartitionedTable(FIRST_TABLE);
assertQueryFailure(() -> dropPartition(FIRST_TABLE, "col", "f"))
.hasMessageContaining("Partition 'col=f' does not exist");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterPartitionMissingTableShouldFail()
{
assertQueryFailure(() -> addPartition("missing_table", "col", "f", "/"))
.hasMessageContaining("Table 'default.missing_table' not found");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterUnpartitionedTableShouldFail()
{
createUnpartitionedTable(SECOND_TABLE);
assertQueryFailure(() -> addPartition(SECOND_TABLE, "col", "a", "/"))
.hasMessageContaining("Table is not partitioned: default." + SECOND_TABLE);
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterViewTableShouldFail()
{
createPartitionedTable(FIRST_TABLE);
createView(VIEW_TABLE, FIRST_TABLE);
assertQueryFailure(() -> addPartition(VIEW_TABLE, "col", "a", "/"))
.hasMessageContaining("Table is a view: default." + VIEW_TABLE);
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterPartitionCollisionShouldFail()
{
createPartitionedTable(FIRST_TABLE);
assertQueryFailure(() -> addPartition(FIRST_TABLE, "col", "a", "/"))
.hasMessageContaining("Partition [col=a] is already registered");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterPartitionInvalidPartitionColumnsShouldFail()
{
createPartitionedTable(FIRST_TABLE);
assertQueryFailure(() -> addPartition(FIRST_TABLE, "not_existing_partition_col", "a", "/"))
.hasMessageContaining("Provided partition column names do not match actual partition column names: [col]");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterPartitionInvalidLocationShouldFail()
{
createPartitionedTable(FIRST_TABLE);
assertQueryFailure(() -> addPartition(FIRST_TABLE, "col", "f", "/some/non/existing/path"))
.hasMessageContaining("Partition location does not exist: /some/non/existing/path");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterPartitionWithDefaultPartitionLocation()
{
createPartitionedTable(FIRST_TABLE);
dropPartition(FIRST_TABLE, "col", "a");
dropPartition(FIRST_TABLE, "col", "c");
assertThat(getTableCount(FIRST_TABLE)).isEqualTo(1L);
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("b");
// Re-register partition using it's default location
addPartition(FIRST_TABLE, "col", "c");
assertThat(getTableCount(FIRST_TABLE)).isEqualTo(2L);
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("b", "c");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterPartition()
{
createPartitionedTable(FIRST_TABLE);
createPartitionedTable(SECOND_TABLE);
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("a", "b", "c");
onTrino().executeQuery(format("INSERT INTO %s (val, col) VALUES (10, 'f')", SECOND_TABLE));
assertThat(getPartitionValues(SECOND_TABLE)).containsOnly("a", "b", "c", "f");
// Move partition f from SECOND_TABLE to FIRST_TABLE
addPartition(FIRST_TABLE, "col", "f", getTablePath(SECOND_TABLE, 1) + "/col=f");
dropPartition(SECOND_TABLE, "col", "f");
assertThat(getPartitionValues(SECOND_TABLE)).containsOnly("a", "b", "c");
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("a", "b", "c", "f");
}
@Test(groups = SMOKE)
@Flaky(issue = RETRYABLE_FAILURES_ISSUES, match = RETRYABLE_FAILURES_MATCH)
public void testRegisterPartitionFromAnyLocation()
{
createPartitionedTable(FIRST_TABLE);
createDanglingLocationWithData(OUTSIDE_TABLES_DIRECTORY_PATH, "dangling");
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("a", "b", "c");
addPartition(FIRST_TABLE, "col", "f", OUTSIDE_TABLES_DIRECTORY_PATH);
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("a", "b", "c", "f");
assertThat(getValues(FIRST_TABLE)).containsOnly(1, 2, 3, 42);
dropPartition(FIRST_TABLE, "col", "f");
assertThat(getPartitionValues(FIRST_TABLE)).containsOnly("a", "b", "c");
assertThat(getValues(FIRST_TABLE)).containsOnly(1, 2, 3);
}
private QueryResult dropPartition(String tableName, String partitionCol, String partition)
{
return onTrino().executeQuery(format("CALL system.unregister_partition(\n" +
" schema_name => '%s',\n" +
" table_name => '%s',\n" +
" partition_columns => ARRAY['%s'],\n" +
" partition_values => ARRAY['%s'])",
"default", tableName, partitionCol, partition));
}
private QueryResult addPartition(String tableName, String partitionCol, String partition, String location)
{
return onTrino().executeQuery(format("CALL system.register_partition(\n" +
" schema_name => '%s',\n" +
" table_name => '%s',\n" +
" partition_columns => ARRAY['%s'],\n" +
" partition_values => ARRAY['%s'],\n" +
" location => '%s')",
"default", tableName, partitionCol, partition, location));
}
private QueryResult addPartition(String tableName, String partitionCol, String partition)
{
return onTrino().executeQuery(format("CALL system.register_partition(\n" +
" schema_name => '%s',\n" +
" table_name => '%s',\n" +
" partition_columns => ARRAY['%s'],\n" +
" partition_values => ARRAY['%s'])",
"default", tableName, partitionCol, partition));
}
private void createDanglingLocationWithData(String path, String tableName)
{
hdfsClient.createDirectory(path);
HiveDataSource dataSource = createResourceDataSource(tableName, "io/trino/tests/product/hive/data/single_int_column/data.textfile");
hdfsDataSourceWriter.ensureDataOnHdfs(path, dataSource);
}
private static void createPartitionedTable(String tableName)
{
onTrino().executeQuery("DROP TABLE IF EXISTS " + tableName);
onTrino().executeQuery("CREATE TABLE " + tableName + " (val int, col varchar) WITH (format = 'TEXTFILE', partitioned_by = ARRAY['col'])");
onTrino().executeQuery("INSERT INTO " + tableName + " VALUES (1, 'a'), (2, 'b'), (3, 'c')");
}
private static void createView(String viewName, String tableName)
{
onTrino().executeQuery("DROP VIEW IF EXISTS " + viewName);
onTrino().executeQuery(format("CREATE VIEW %s AS SELECT val, col FROM %s", viewName, tableName));
}
private static void createUnpartitionedTable(String tableName)
{
onTrino().executeQuery("DROP TABLE IF EXISTS " + tableName);
onTrino().executeQuery("CREATE TABLE " + tableName + " (val int, col varchar) WITH (format = 'TEXTFILE')");
onTrino().executeQuery("INSERT INTO " + tableName + " VALUES (1, 'a'), (2, 'b'), (3, 'c')");
}
private Long getTableCount(String tableName)
{
return (Long) onTrino().executeQuery("SELECT count(*) FROM " + tableName).getOnlyValue();
}
private Set getPartitionValues(String tableName)
{
return onTrino().executeQuery("SELECT col FROM " + tableName).rows().stream().map(row -> row.get(0)).map(String.class::cast).collect(Collectors.toSet());
}
private Set getValues(String tableName)
{
return onTrino().executeQuery("SELECT val FROM " + tableName).column(1).stream()
.map(Integer.class::cast)
.collect(toImmutableSet());
}
}