io.trino.tests.product.deltalake.TestDeltaLakeDeleteCompatibility Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.deltalake;
import com.google.common.collect.ImmutableList;
import io.trino.tempto.BeforeMethodWithContext;
import io.trino.tempto.assertions.QueryAssert.Row;
import io.trino.testing.DataProviders;
import io.trino.testng.services.Flaky;
import io.trino.tests.product.deltalake.util.DatabricksVersion;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
import static io.trino.tempto.assertions.QueryAssert.Row.row;
import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_DATABRICKS;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_EXCLUDE_91;
import static io.trino.tests.product.TestGroups.DELTA_LAKE_OSS;
import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.DATABRICKS_COMMUNICATION_FAILURE_ISSUE;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.DATABRICKS_COMMUNICATION_FAILURE_MATCH;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.dropDeltaTableWithRetry;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getDatabricksRuntimeVersion;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getTablePropertiesOnDelta;
import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.getTablePropertyOnDelta;
import static io.trino.tests.product.utils.QueryExecutors.onDelta;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.assertj.core.api.Assertions.entry;
public class TestDeltaLakeDeleteCompatibility
extends BaseTestDeltaLakeS3Storage
{
private Optional databricksRuntimeVersion;
@BeforeMethodWithContext
public void determineDatabricksVersion()
{
databricksRuntimeVersion = getDatabricksRuntimeVersion();
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS}, dataProviderClass = DataProviders.class, dataProvider = "trueFalse")
public void testDeleteOnEnforcedConstraintsReturnsRowsCount(boolean partitioned)
{
String tableName = "test_delete_push_down_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(v INT, p INT)" +
"USING delta " +
(partitioned ? "PARTITIONED BY (p)" : "") +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 10), (2, 10), (11, 20), (21, 30), (22, 30)");
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (3, 10), (12, 20)");
if (partitioned) {
assertThat(onTrino().executeQuery("DELETE FROM default." + tableName + " WHERE p = 10"))
.containsOnly(row(3));
assertThat(onTrino().executeQuery("DELETE FROM default." + tableName))
.containsOnly(row(4));
}
else {
assertThat(onTrino().executeQuery("DELETE FROM default." + tableName))
.containsOnly(row(7));
}
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).hasNoRows();
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testDeleteCompatibility()
{
String tableName = "test_delete_compatibility_" + randomNameSuffix();
onDelta().executeQuery("CREATE TABLE default." + tableName + " (a int, b int)" +
" USING DELTA LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)");
onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a % 2 = 0");
List expectedRows = ImmutableList.of(
row(1, 2),
row(3, 4),
row(5, 6));
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(expectedRows);
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(expectedRows);
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeleteOnAppendOnlyWriterFeature()
{
String tableName = "test_delete_on_append_only_feature_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT, b INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'" +
"TBLPROPERTIES ('delta.minWriterVersion'='7', 'delta.appendOnly' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1,11), (2, 12)");
assertThat(getTablePropertiesOnDelta("default", tableName))
.contains(entry("delta.feature.appendOnly", "supported"))
.contains(entry("delta.appendOnly", "true"));
assertQueryFailure(() -> onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 1"))
.hasMessageContaining("This table is configured to only allow appends");
assertQueryFailure(() -> onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a = 1"))
.hasMessageContaining("Cannot modify rows from a table with 'delta.appendOnly' set to true");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(1, 11), row(2, 12));
// delta.feature.appendOnly still exists even after unsetting the property
onDelta().executeQuery("ALTER TABLE default." + tableName + " UNSET TBLPROPERTIES ('delta.feature.appendOnly')");
assertThat(getTablePropertiesOnDelta("default", tableName))
.contains(entry("delta.feature.appendOnly", "supported"))
.contains(entry("delta.appendOnly", "true"));
// Disable delta.appendOnly property
onDelta().executeQuery("ALTER TABLE default." + tableName + " SET TBLPROPERTIES ('delta.appendOnly'=false)");
assertThat(getTablePropertiesOnDelta("default", tableName))
.contains(entry("delta.feature.appendOnly", "supported"))
.contains(entry("delta.appendOnly", "false"));
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 1");
onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a = 2");
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).hasNoRows();
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
// OSS Delta doesn't support TRUNCATE TABLE statement
@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testTruncateTable()
{
String tableName = "test_truncate_table_" + randomNameSuffix();
onTrino().executeQuery("" +
"CREATE TABLE delta.default." + tableName +
"(a INT)" +
"WITH (location = 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "')");
try {
onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES 1, 2, 3");
onTrino().executeQuery("TRUNCATE TABLE delta.default." + tableName);
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).hasNoRows();
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES 4, 5, 6");
onDelta().executeQuery("TRUNCATE TABLE default." + tableName);
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).hasNoRows();
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testTrinoDeletionVectors()
{
String tableName = "test_trino_deletion_vectors_" + randomNameSuffix();
onTrino().executeQuery("" +
"CREATE TABLE delta.default." + tableName +
"(a INT)" +
"WITH (deletion_vectors_enabled = true, location = 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "')");
try {
onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES 1, 2");
onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a = 2");
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).containsOnly(row(1));
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).containsOnly(row(1));
assertThat(getTablePropertyOnDelta("default", tableName, "delta.enableDeletionVectors")).isEqualTo("true");
}
finally {
onTrino().executeQuery("DROP TABLE delta.default." + tableName);
}
}
// Databricks 12.1 and OSS Delta 2.4.0 added support for deletion vectors
@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingModeDataProvider")
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testDeletionVectors(String mode)
{
String tableName = "test_deletion_vectors_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
" (a INT, b INT)" +
" USING delta " +
" LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
" TBLPROPERTIES ('delta.enableDeletionVectors' = true, 'delta.columnMapping.mode' = '" + mode + "')");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1,11), (2, 22)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 2");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(1, 11));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(1, 11));
// Reinsert the deleted row and verify that the row appears correctly
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (2, 22)");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(1, 11), row(2, 22));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(1, 11), row(2, 22));
// Execute DELETE statement which doesn't delete any rows
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = -1");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(1, 11), row(2, 22));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(1, 11), row(2, 22));
// Verify other statements
assertThat(onTrino().executeQuery("SHOW TABLES FROM delta.default"))
.contains(row(tableName));
assertThat(onTrino().executeQuery("SELECT version, operation FROM delta.default.\"" + tableName + "$history\""))
// Use 'contains' method because newer Databricks clusters execute OPTIMIZE statement in the background
.contains(row(0, "CREATE TABLE"), row(1, "WRITE"), row(2, "DELETE"));
assertThat(onTrino().executeQuery("SELECT column_name FROM delta.information_schema.columns WHERE table_schema = 'default' AND table_name = '" + tableName + "'"))
.contains(row("a"), row("b"));
assertThat(onTrino().executeQuery("SHOW COLUMNS FROM delta.default." + tableName))
.contains(row("a", "integer", "", ""), row("b", "integer", "", ""));
assertThat(onTrino().executeQuery("DESCRIBE delta.default." + tableName))
.contains(row("a", "integer", "", ""), row("b", "integer", "", ""));
onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES (3, 33)");
onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a = 1");
onTrino().executeQuery("UPDATE delta.default." + tableName + " SET a = 30 WHERE b = 33");
onTrino().executeQuery("MERGE INTO delta.default." + tableName + " t USING delta.default." + tableName + " s " +
"ON (t.a = s.a) WHEN MATCHED THEN UPDATE SET b = -1");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(2, -1), row(30, -1));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(2, -1), row(30, -1));
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsWithPartitionedTable()
{
String tableName = "test_deletion_vectors_partitioned_table_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(id INT, part STRING)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"PARTITIONED BY (part)" +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 'part'), (2, 'part')");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE id = 1");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(2, "part"));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(2, "part"));
}
finally {
onDelta().executeQuery("DROP TABLE " + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsWithRandomPrefix()
{
String tableName = "test_deletion_vectors_random_prefix_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT, b INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true, 'delta.randomizeFilePrefixes' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 11), (2, 22)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 2");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(1, 11));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(1, 11));
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDisableDeletionVectors()
{
String tableName = "test_deletion_vectors_random_prefix_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT, b INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 11), (2, 22), (3, 33)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 2");
onDelta().executeQuery("ALTER TABLE default." + tableName + " SET TBLPROPERTIES ('delta.enableDeletionVectors' = false)");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(1, 11), row(3, 33));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(1, 11), row(3, 33));
// Delete rows which already existed before disabling deletion vectors
onDelta().executeQuery("DELETE FROM default." + tableName);
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.hasNoRows();
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.hasNoRows();
// Insert new rows and delete it after disabling deletion vectors
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (4, 44), (5, 55)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 4");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(5, 55));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(5, 55));
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsWithCheckpointInterval()
{
String tableName = "test_deletion_vectors_random_prefix_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT, b INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true, 'delta.checkpointInterval' = 1)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 11), (2, 22)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 2");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName))
.containsOnly(row(1, 11));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.containsOnly(row(1, 11));
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsMergeDelete()
{
String tableName = "test_deletion_vectors_merge_delete_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " SELECT explode(sequence(1, 10))");
onDelta().executeQuery("MERGE INTO default." + tableName + " t USING default." + tableName + " s " +
"ON (t.a = s.a) WHEN MATCHED AND t.a > 5 THEN DELETE");
List expected = ImmutableList.of(row(1), row(2), row(3), row(4), row(5));
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).contains(expected);
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).contains(expected);
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsLargeNumbers()
{
String tableName = "test_deletion_vectors_large_numbers_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " SELECT explode(sequence(1, 10000))");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a > 1");
List expected = ImmutableList.of(row(1));
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).contains(expected);
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).contains(expected);
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testChangeDataFeedWithDeletionVectors()
{
String tableName = "test_change_data_feed_with_deletion_vectors_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(col1 STRING, updated_column INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableChangeDataFeed' = true, 'delta.enableDeletionVectors' = true)");
onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES ('testValue1', 1), ('testValue2', 2), ('testValue3', 3)");
onTrino().executeQuery("UPDATE delta.default." + tableName + " SET updated_column = 30 WHERE col1 = 'testValue3'");
assertThat(onDelta().executeQuery("SELECT col1, updated_column, _change_type FROM table_changes('default." + tableName + "', 0)"))
.containsOnly(
row("testValue1", 1, "insert"),
row("testValue2", 2, "insert"),
row("testValue3", 3, "insert"),
row("testValue3", 3, "update_preimage"),
row("testValue3", 30, "update_postimage"));
assertThat(onTrino().executeQuery("SELECT col1, updated_column, _change_type FROM TABLE(delta.system.table_changes('default', '" + tableName + "', 0))"))
.containsOnly(
row("testValue1", 1, "insert"),
row("testValue2", 2, "insert"),
row("testValue3", 3, "insert"),
row("testValue3", 3, "update_preimage"),
row("testValue3", 30, "update_postimage"));
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS},
dataProviderClass = DataProviders.class, dataProvider = "trueFalse")
public void testDeletionVectorsAcrossAddFile(boolean partitioned)
{
String tableName = "test_deletion_vectors_accross_add_file_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT, b INT)" +
"USING delta " +
(partitioned ? "PARTITIONED BY (a)" : "") +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1,11), (2,22)");
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (3,33), (4,44)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 2 OR a = 4");
List expected = ImmutableList.of(row(1, 11), row(3, 33));
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).containsOnly(expected);
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).containsOnly(expected);
// Verify behavior when the query doesn't read non-partition columns
assertThat(onTrino().executeQuery("SELECT count(*) FROM delta.default." + tableName)).containsOnly(row(2));
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testDeletionVectorsTruncateTable()
{
testDeletionVectorsDeleteAll(tableName -> {
if (databricksRuntimeVersion.isPresent()) {
onDelta().executeQuery("TRUNCATE TABLE default." + tableName);
}
else {
assertThatThrownBy(() -> onDelta().executeQuery("TRUNCATE TABLE default." + tableName))
.hasMessageContaining("Table does not support truncates");
throw new SkipException("OSS Delta Lake doesn't support truncating tables with deletion vector");
}
});
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsDeleteFrom()
{
testDeletionVectorsDeleteAll(tableName -> onDelta().executeQuery("DELETE FROM default." + tableName));
}
private void testDeletionVectorsDeleteAll(Consumer deleteRow)
{
String tableName = "test_deletion_vectors_delete_all_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " SELECT explode(sequence(1, 1000))");
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).hasRowsCount(1000);
deleteRow.accept(tableName);
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).hasNoRows();
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).hasNoRows();
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsOptimize()
{
String tableName = "test_deletion_vectors_optimize_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT, b INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1,11), (2,22)");
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (3,33), (4,44)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 1 OR a = 3");
List expected = ImmutableList.of(row(2, 22), row(4, 44));
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).contains(expected);
onDelta().executeQuery("OPTIMIZE default." + tableName);
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).contains(expected);
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)).contains(expected);
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsAbsolutePath()
{
String baseTableName = "test_deletion_vectors_base_absolute_" + randomNameSuffix();
String tableName = "test_deletion_vectors_absolute_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + baseTableName +
"(a INT, b INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + baseTableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + baseTableName + " VALUES (1,11), (2,22), (3,33), (4,44)");
// Ensure that the content of the table is coalesced in a larger file
onDelta().executeQuery("OPTIMIZE default." + baseTableName);
onDelta().executeQuery("DELETE FROM default." + baseTableName + " WHERE a = 1 OR a = 3");
List expected = ImmutableList.of(row(2, 22), row(4, 44));
assertThat(onDelta().executeQuery("SELECT * FROM default." + baseTableName)).contains(expected);
assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + baseTableName)).contains(expected);
// The cloned table has 'p' (absolute path) storageType for deletion vector
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName + " SHALLOW CLONE " + baseTableName + " " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-clone-" + baseTableName + "'");
assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)).contains(expected);
assertQueryFailure(() -> onTrino().executeQuery("SELECT * FROM delta.default." + tableName))
.hasMessageContaining("Unsupported storage type for deletion vector: p");
}
finally {
dropDeltaTableWithRetry("default." + baseTableName);
dropDeltaTableWithRetry("default." + tableName);
}
}
@Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS})
public void testDeletionVectorsWithChangeDataFeed()
{
String tableName = "test_deletion_vectors_cdf_" + randomNameSuffix();
onDelta().executeQuery("" +
"CREATE TABLE default." + tableName +
"(a INT, b INT)" +
"USING delta " +
"LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "' " +
"TBLPROPERTIES ('delta.enableDeletionVectors' = true, 'delta.enableChangeDataFeed' = true)");
try {
onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1,11), (2,22), (3,33), (4,44)");
onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a = 1 OR a = 3");
assertThat(onDelta().executeQuery(
"SELECT a, b, _change_type, _commit_version FROM table_changes('default." + tableName + "', 0)"))
.containsOnly(
row(1, 11, "insert", 1L),
row(2, 22, "insert", 1L),
row(3, 33, "insert", 1L),
row(4, 44, "insert", 1L),
row(1, 11, "delete", 2L),
row(3, 33, "delete", 2L));
// TODO Fix table_changes function failure
assertQueryFailure(() -> onTrino().executeQuery("SELECT a, b, _change_type, _commit_version FROM TABLE(delta.system.table_changes('default', '" + tableName + "', 0))"))
.hasMessageContaining("Change Data Feed is not enabled at version 2. Version contains 'remove' entries without 'cdc' entries");
}
finally {
dropDeltaTableWithRetry("default." + tableName);
}
}
@DataProvider
public Object[][] columnMappingModeDataProvider()
{
return new Object[][] {
{"none"},
{"name"},
{"id"}
};
}
}