
io.trino.tests.product.hive.TestHiveViews Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.hive;
import com.google.common.collect.ImmutableList;
import io.trino.tempto.Requires;
import io.trino.tempto.fulfillment.table.hive.tpch.ImmutableTpchTablesRequirements.ImmutableNationTable;
import io.trino.tempto.fulfillment.table.hive.tpch.ImmutableTpchTablesRequirements.ImmutableOrdersTable;
import org.assertj.core.api.Assertions;
import org.testng.annotations.Test;
import java.math.BigDecimal;
import static io.trino.tempto.assertions.QueryAssert.Row.row;
import static io.trino.tempto.assertions.QueryAssert.assertThat;
import static io.trino.tests.product.TestGroups.HIVE_VIEWS;
import static io.trino.tests.product.utils.QueryExecutors.onHive;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@Requires({
ImmutableNationTable.class,
ImmutableOrdersTable.class,
})
public class TestHiveViews
extends AbstractTestHiveViews
{
@Test(groups = HIVE_VIEWS)
public void testFailingHiveViewsWithMetadataListing()
{
setupBrokenView();
testFailingHiveViewsWithInformationSchema();
testFailingHiveViewsWithSystemJdbc();
// cleanup
onHive().executeQuery("DROP SCHEMA IF EXISTS test_list_failing_views CASCADE");
}
private void testFailingHiveViewsWithInformationSchema()
{
// The expected behavior is different across hive versions. For hive 3, the call "getTableNamesByType" is
// used in ThriftHiveMetastore#getAllViews. For older versions, the fallback to doGetTablesWithParameter
// is used, so Trino's information_schema.views table does not include translated Hive views.
String withSchemaFilter = "SELECT table_name FROM information_schema.views WHERE table_schema = 'test_list_failing_views'";
String withNoFilter = "SELECT table_name FROM information_schema.views";
if (getHiveVersionMajor() == 3) {
assertThat(onTrino().executeQuery(withSchemaFilter)).containsOnly(row("correct_view"));
assertThat(onTrino().executeQuery(withNoFilter)).contains(row("correct_view"));
}
else {
assertThat(onTrino().executeQuery(withSchemaFilter)).hasNoRows();
Assertions.assertThat(onTrino().executeQuery(withNoFilter).rows()).doesNotContain(ImmutableList.of("correct_view"));
}
// Queries with filters on table_schema and table_name are optimized to only fetch the specified table and uses
// a different API. so the Hive version does not matter here.
assertThat(onTrino().executeQuery("SELECT table_name FROM information_schema.views WHERE table_schema = 'test_list_failing_views' AND table_name = 'correct_view'"))
.containsOnly(row("correct_view"));
// Listing fails when metadata for the problematic view is queried specifically
assertThatThrownBy(() -> onTrino().executeQuery("SELECT table_name FROM information_schema.views WHERE table_schema = 'test_list_failing_views' AND table_name = 'failing_view'"))
.hasMessageContaining("Failed to translate Hive view 'test_list_failing_views.failing_view'");
// Queries on information_schema.columns also trigger ConnectorMetadata#getViews. Columns from failing_view are
// listed too since HiveMetadata#listTableColumns does not ignore views.
assertThat(onTrino().executeQuery("SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = 'test_list_failing_views'"))
.containsOnly(
row("correct_view", "n_nationkey"),
row("correct_view", "n_name"),
row("correct_view", "n_regionkey"),
row("correct_view", "n_comment"),
row("failing_view", "col0"));
assertThatThrownBy(() -> onTrino().executeQuery("SELECT * FROM information_schema.columns WHERE table_schema = 'test_list_failing_views' AND table_name = 'failing_view'"))
.hasMessageContaining("Failed to translate Hive view 'test_list_failing_views.failing_view'");
}
private void testFailingHiveViewsWithSystemJdbc()
{
// The expected behavior is different across hive versions. For hive 3, the call "getTableNamesByType" is
// used in ThriftHiveMetastore#getAllViews. For older versions, the fallback to doGetTablesWithParameter
// is used, so Trino's system.jdbc.tables table does not include translated Hive views.
String withSchemaFilter = "SELECT table_name FROM system.jdbc.tables WHERE " +
"table_cat = 'hive' AND " +
"table_schem = 'test_list_failing_views' AND " +
"table_type = 'VIEW'";
String withNoFilter = "SELECT table_name FROM system.jdbc.tables WHERE table_cat = 'hive' AND table_type = 'VIEW'";
if (getHiveVersionMajor() == 3) {
assertThat(onTrino().executeQuery(withSchemaFilter)).containsOnly(row("correct_view"), row("failing_view"));
assertThat(onTrino().executeQuery(withNoFilter)).contains(row("correct_view"), row("failing_view"));
}
else {
assertThat(onTrino().executeQuery(withSchemaFilter)).hasNoRows();
Assertions.assertThat(onTrino().executeQuery(withNoFilter).rows()).doesNotContain(ImmutableList.of("correct_view"));
}
// Queries with filters on table_schema and table_name are optimized to only fetch the specified table and uses
// a different API. so the Hive version does not matter here.
assertThat(onTrino().executeQuery(
"SELECT table_name FROM system.jdbc.tables WHERE " +
"table_cat = 'hive' AND " +
"table_schem = 'test_list_failing_views' AND " +
"table_name = 'correct_view'"))
.containsOnly(row("correct_view"));
// Listing fails when metadata for the problematic view is queried specifically
assertThatThrownBy(() -> onTrino().executeQuery(
"SELECT table_name FROM system.jdbc.tables WHERE " +
"table_cat = 'hive' AND " +
"table_schem = 'test_list_failing_views' AND " +
"table_name = 'failing_view'"))
.hasMessageContaining("Failed to translate Hive view 'test_list_failing_views.failing_view'");
// Queries on system.jdbc.columns also trigger ConnectorMetadata#getViews. Columns from failing_view are
// listed too since HiveMetadata#listTableColumns does not ignore views.
assertThat(onTrino().executeQuery("SELECT table_name, column_name FROM system.jdbc.columns WHERE table_cat = 'hive' AND table_schem = 'test_list_failing_views'"))
.containsOnly(
row("correct_view", "n_nationkey"),
row("correct_view", "n_name"),
row("correct_view", "n_regionkey"),
row("correct_view", "n_comment"),
row("failing_view", "col0"));
assertThatThrownBy(() -> onTrino().executeQuery("SELECT * FROM system.jdbc.columns WHERE table_cat = 'hive' AND table_schem = 'test_list_failing_views' AND table_name = 'failing_view'"))
.hasMessageContaining("Failed to translate Hive view 'test_list_failing_views.failing_view'");
}
private static void setupBrokenView()
{
onHive().executeQuery("DROP SCHEMA IF EXISTS test_list_failing_views CASCADE");
onHive().executeQuery("CREATE SCHEMA test_list_failing_views");
onHive().executeQuery("CREATE VIEW test_list_failing_views.correct_view AS SELECT * FROM nation limit 5");
// Create a view for which the translation is guaranteed to fail
onTrino().executeQuery("CREATE TABLE test_list_failing_views.table_dropped (col0 BIGINT)");
onHive().executeQuery("CREATE VIEW test_list_failing_views.failing_view AS SELECT * FROM test_list_failing_views.table_dropped");
onTrino().executeQuery("DROP TABLE test_list_failing_views.table_dropped");
}
@Test(groups = HIVE_VIEWS)
public void testLateralViewExplode()
{
onTrino().executeQuery("DROP TABLE IF EXISTS pageAds");
onTrino().executeQuery("CREATE TABLE pageAds(pageid, adid_list) WITH (format='TEXTFILE') AS " +
"VALUES " +
" (VARCHAR 'two', ARRAY[11, 22]), " +
" ('nothing', NULL), " +
" ('zero', ARRAY[]), " +
" ('one', ARRAY[42])");
onHive().executeQuery("DROP VIEW IF EXISTS hive_lateral_view");
onHive().executeQuery("CREATE VIEW hive_lateral_view as SELECT pageid, adid FROM pageAds LATERAL VIEW explode(adid_list) adTable AS adid");
assertViewQuery(
"SELECT * FROM hive_lateral_view",
queryAssert -> queryAssert.containsOnly(
row("two", 11),
row("two", 22),
row("one", 42)));
onHive().executeQuery("DROP VIEW IF EXISTS hive_lateral_view_outer_explode");
onHive().executeQuery("CREATE VIEW hive_lateral_view_outer_explode as " +
"SELECT pageid, adid FROM pageAds LATERAL VIEW OUTER explode(adid_list) adTable AS adid");
assertViewQuery(
"SELECT * FROM hive_lateral_view_outer_explode",
queryAssert -> queryAssert.containsOnly(
row("two", 11),
row("two", 22),
row("one", 42),
row("nothing", null),
row("zero", null)));
}
@Test(groups = HIVE_VIEWS)
public void testLateralViewExplodeArrayOfStructs()
{
onTrino().executeQuery("DROP TABLE IF EXISTS pageAdsStructs");
onTrino().executeQuery("CREATE TABLE pageAdsStructs(pageid VARCHAR, adid_list ARRAY(ROW(a INTEGER, b INTEGER))) WITH (format='TEXTFILE')");
onTrino().executeQuery("INSERT INTO pageAdsStructs " +
"VALUES " +
" ('two', ARRAY[ROW(11, 12), ROW(13, 14)]), " +
" ('nothing', NULL), " +
" ('zero', ARRAY[]), " +
" ('one', ARRAY[ROW(42, 43)])");
onHive().executeQuery("DROP VIEW IF EXISTS hive_lateral_view_structs");
onHive().executeQuery("CREATE VIEW hive_lateral_view_structs as SELECT pageid, adid FROM pageAdsStructs LATERAL VIEW explode(adid_list) adTable AS adid");
assertViewQuery(
"SELECT pageid, adid.a, adid.b FROM hive_lateral_view_structs",
queryAssert -> queryAssert.containsOnly(
row("two", 11, 12),
row("two", 13, 14),
row("one", 42, 43)));
onHive().executeQuery("DROP VIEW IF EXISTS hive_lateral_view_structs_outer_explode");
onHive().executeQuery("CREATE VIEW hive_lateral_view_structs_outer_explode as " +
"SELECT pageid, adid FROM pageAdsStructs LATERAL VIEW OUTER explode(adid_list) adTable AS adid");
assertViewQuery(
"SELECT pageid, adid.a, adid.b FROM hive_lateral_view_structs_outer_explode",
queryAssert -> queryAssert.containsOnly(
row("two", 11, 12),
row("two", 13, 14),
row("one", 42, 43),
row("nothing", null, null),
row("zero", null, null)));
}
@Test(groups = HIVE_VIEWS)
public void testLateralViewJsonTupleAs()
{
onTrino().executeQuery("DROP TABLE IF EXISTS test_json_tuple_table");
onTrino().executeQuery("" +
"CREATE TABLE test_json_tuple_table WITH (format='TEXTFILE') AS " +
"SELECT 3 id, CAST('{\"user_id\": 1000, \"first.name\": \"Mateusz\", \"Last Name\": \"Gajewski\", \".NET\": true, \"aNull\": null}' AS varchar) jsonstr");
onHive().executeQuery("DROP VIEW IF EXISTS test_json_tuple_view");
onHive().executeQuery("CREATE VIEW test_json_tuple_view AS " +
"SELECT `t`.`id`, `x`.`a`, `x`.`b`, `x`.`c`, `x`.`d`, `x`.`e`, `x`.`f` FROM test_json_tuple_table AS `t` " +
"LATERAL VIEW json_tuple(`t`.`jsonstr`, \"first.name\", \"Last Name\", '.NET', \"user_id\", \"aNull\", \"nonexistentField\") `x` AS `a`, `b`, `c`, `d`, `e`, `f`");
assertViewQuery(
"SELECT * FROM test_json_tuple_view",
queryAssert -> queryAssert.containsOnly(row(3, "Mateusz", "Gajewski", "true", "1000", null, null)));
}
@Test(groups = HIVE_VIEWS)
public void testFromUtcTimestamp()
{
onTrino().executeQuery("DROP TABLE IF EXISTS test_from_utc_timestamp_source");
onHive().executeQuery("CREATE TABLE test_from_utc_timestamp_source (" +
" source_tinyint tinyint, " +
" source_smallint smallint, " +
" source_integer int, " +
" source_bigint bigint, " +
" source_float float, " +
" source_double double, " +
" source_decimal_three decimal(10,3), " +
" source_decimal_zero decimal(10,0), " +
" source_timestamp timestamp, " +
" source_date date" +
")");
// insert via Trino as we noticed problems with creating test table in Hive using CTAS at one go for some Hive distributions
onTrino().executeQuery("INSERT INTO test_from_utc_timestamp_source VALUES ( " +
" 123, " +
" 10123, " +
" 259200123, " +
" 2592000123, " +
" 2592000.0, " +
" 2592000.123, " +
" 2592000.123," +
" 2592000," +
" timestamp '1970-01-30 16:00:00.000', " +
" date '1970-01-30'" +
")");
onHive().executeQuery("DROP VIEW IF EXISTS test_from_utc_timestamp_view");
onHive().executeQuery("CREATE VIEW " +
"test_from_utc_timestamp_view " +
"AS SELECT " +
// TODO(https://github.com/trinodb/trino/issues/8853) add testcases with 3-letter tz names (like PST) when we have $canonicalize_hive_timezone_id logic in place
" CAST(from_utc_timestamp(source_tinyint, 'America/Los_Angeles') AS STRING) ts_tinyint, " +
" CAST(from_utc_timestamp(source_smallint, 'America/Los_Angeles') AS STRING) ts_smallint, " +
" CAST(from_utc_timestamp(source_integer, 'America/Los_Angeles') AS STRING) ts_integer, " +
" CAST(from_utc_timestamp(source_bigint, 'America/Los_Angeles') AS STRING) ts_bigint, " +
" CAST(from_utc_timestamp(source_float, 'America/Los_Angeles') AS STRING) ts_float, " +
" CAST(from_utc_timestamp(source_double, 'America/Los_Angeles') AS STRING) ts_double, " +
" CAST(from_utc_timestamp(source_decimal_three, 'America/Los_Angeles') AS STRING) ts_decimal_three, " +
" CAST(from_utc_timestamp(source_decimal_zero, 'America/Los_Angeles') AS STRING) ts_decimal_zero, " +
" CAST(from_utc_timestamp(source_timestamp, 'America/Los_Angeles') AS STRING) ts_timestamp, " +
" CAST(from_utc_timestamp(source_date, 'America/Los_Angeles') AS STRING) ts_date " +
"FROM test_from_utc_timestamp_source");
// check result on Trino
assertThat(onTrino().executeQuery("SELECT * FROM test_from_utc_timestamp_view"))
.containsOnly(row(
"1969-12-31 16:00:00.123",
"1969-12-31 16:00:10.123",
"1970-01-03 16:00:00.123",
"1970-01-30 16:00:00.123",
"1970-01-30 16:00:00.000",
"1970-01-30 16:00:00.123",
"1970-01-30 16:00:00.123",
"1970-01-30 16:00:00.000",
"1970-01-30 08:00:00.000",
"1970-01-29 16:00:00.000"));
// check result on Hive
if (isObsoleteFromUtcTimestampSemantics()) {
// For older hive version we expect different results on Hive side; as from_utc_timestamp semantics changed over time.
// Currently view transformation logic always follows new semantics.
// Leaving Hive assertions as documentation.
assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_view"))
.containsOnly(row(
"1969-12-31 21:30:00.123",
"1969-12-31 21:30:10.123",
"1970-01-03 21:30:00.123",
"1970-01-30 21:30:00.123",
"1970-01-30 21:30:00",
"1970-01-30 21:30:00.123",
"1970-01-30 21:30:00.123",
"1970-01-30 21:30:00",
"1970-01-30 08:00:00",
"1970-01-29 16:00:00"));
}
else {
assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_view"))
.containsOnly(row(
"1969-12-31 16:00:00.123",
"1969-12-31 16:00:10.123",
"1970-01-03 16:00:00.123",
"1970-01-30 16:00:00.123",
"1970-01-30 16:00:00",
"1970-01-30 16:00:00.123",
"1970-01-30 16:00:00.123",
"1970-01-30 16:00:00",
"1970-01-30 08:00:00",
"1970-01-29 16:00:00"));
}
}
@Test(groups = HIVE_VIEWS)
public void testFromUtcTimestampCornerCases()
{
onTrino().executeQuery("DROP TABLE IF EXISTS test_from_utc_timestamp_corner_cases_source");
onTrino().executeQuery("CREATE TABLE test_from_utc_timestamp_corner_cases_source AS SELECT * FROM (VALUES " +
" CAST(-5000000000001 AS BIGINT)," +
" CAST(-1000000000001 AS BIGINT)," +
" -1," +
" 1," +
" 5000000000001" +
")" +
"AS source(source_bigint)");
onHive().executeQuery("DROP VIEW IF EXISTS test_from_utc_timestamp_corner_cases_view");
onHive().executeQuery("CREATE VIEW " +
"test_from_utc_timestamp_corner_cases_view " +
"AS SELECT " +
" CAST(from_utc_timestamp(source_bigint, 'America/Los_Angeles') as STRING) ts_bigint " +
"FROM test_from_utc_timestamp_corner_cases_source");
// check result on Trino
assertThat(onTrino().executeQuery("SELECT * FROM test_from_utc_timestamp_corner_cases_view"))
.containsOnly(
row("1811-07-23 07:13:41.999"),
row("1938-04-24 14:13:19.999"),
row("1969-12-31 15:59:59.999"),
row("1969-12-31 16:00:00.001"),
row("2128-06-11 01:53:20.001"));
// check result on Hive
if (isObsoleteFromUtcTimestampSemantics()) {
// For older hive version we expect different results on Hive side; as from_utc_timestamp semantics changed over time.
// Currently view transformation logic always follows new semantics.
// Leaving Hive assertions as documentation.
assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_corner_cases_view"))
.containsOnly(
row("1811-07-23 12:51:39.999"), // ???
row("1938-04-24 19:43:19.999"),
row("1969-12-31 21:29:59.999"),
row("1969-12-31 21:30:00.001"),
row("2128-06-11 07:38:20.001"));
}
else {
assertThat(onHive().executeQuery("SELECT * FROM test_from_utc_timestamp_corner_cases_view"))
.containsOnly(
row("1811-07-23 07:13:41.999"),
row("1938-04-24 14:13:19.999"),
row("1969-12-31 15:59:59.999"),
row("1969-12-31 16:00:00.001"),
row("2128-06-11 01:53:20.001"));
}
}
private boolean isObsoleteFromUtcTimestampSemantics()
{
// It appears from_utc_timestamp semantics in Hive changes some time on the way. The guess is that it happened
// together with change of timestamp semantics at version 3.1.
return getHiveVersionMajor() < 3 || (getHiveVersionMajor() == 3 && getHiveVersionMinor() < 1);
}
@Test(groups = HIVE_VIEWS)
public void testCastTimestampAsDecimal()
{
onHive().executeQuery("DROP TABLE IF EXISTS cast_timestamp_as_decimal");
onHive().executeQuery("CREATE TABLE cast_timestamp_as_decimal (a_timestamp TIMESTAMP)");
onHive().executeQuery("INSERT INTO cast_timestamp_as_decimal VALUES ('1990-01-02 12:13:14.123456789')");
onHive().executeQuery("DROP VIEW IF EXISTS cast_timestamp_as_decimal_view");
onHive().executeQuery("CREATE VIEW cast_timestamp_as_decimal_view AS SELECT CAST(a_timestamp as DECIMAL(10,0)) a_cast_timestamp FROM cast_timestamp_as_decimal");
String testQuery = "SELECT * FROM cast_timestamp_as_decimal_view";
if (getHiveVersionMajor() > 3 || (getHiveVersionMajor() == 3 && getHiveVersionMinor() >= 1)) {
assertViewQuery(
testQuery,
queryAssert -> queryAssert.containsOnly(row(new BigDecimal("631282394"))));
}
else {
// For Hive versions older than 3.1 semantics of cast timestamp to decimal is different and it takes into account timezone Hive VM uses.
// We cannot replicate the behaviour in Trino, hence test only documents different expected results.
assertThat(onTrino().executeQuery(testQuery)).containsOnly(row(new BigDecimal("631282394")));
assertThat(onHive().executeQuery(testQuery)).containsOnly(row(new BigDecimal("631261694")));
}
onHive().executeQuery("DROP VIEW cast_timestamp_as_decimal_view");
onHive().executeQuery("DROP TABLE cast_timestamp_as_decimal");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy