All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prestosql.tests.hive.TestHiveCaching Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.prestosql.tests.hive;

import io.airlift.units.Duration;
import io.prestosql.tempto.ProductTest;
import io.prestosql.tempto.assertions.QueryAssert.Row;
import io.prestosql.tests.hive.util.CachingTestUtils.CacheStats;
import org.testng.annotations.Test;

import java.util.Collections;
import java.util.Random;

import static io.airlift.testing.Assertions.assertGreaterThan;
import static io.airlift.testing.Assertions.assertGreaterThanOrEqual;
import static io.prestosql.tempto.assertions.QueryAssert.Row.row;
import static io.prestosql.tempto.assertions.QueryAssert.assertThat;
import static io.prestosql.tempto.query.QueryExecutor.query;
import static io.prestosql.tests.TestGroups.HIVE_CACHING;
import static io.prestosql.tests.TestGroups.PROFILE_SPECIFIC_TESTS;
import static io.prestosql.tests.hive.util.CachingTestUtils.getCacheStats;
import static io.prestosql.tests.utils.QueryAssertions.assertEventually;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.testng.Assert.assertEquals;

public class TestHiveCaching
        extends ProductTest
{
    private static final int NUMBER_OF_FILES = 5;

    @Test(groups = {HIVE_CACHING, PROFILE_SPECIFIC_TESTS})
    public void testReadFromCache()
    {
        testReadFromTable("table1");
        testReadFromTable("table2");
    }

    private void testReadFromTable(String tableNameSuffix)
    {
        String cachedTableName = "hive.default.test_cache_read" + tableNameSuffix;
        String nonCachedTableName = "hivenoncached.default.test_cache_read" + tableNameSuffix;

        Row[] tableData = createTestTable(nonCachedTableName);

        CacheStats beforeCacheStats = getCacheStats();
        long initialRemoteReads = beforeCacheStats.getRemoteReads();
        long initialCachedReads = beforeCacheStats.getCachedReads();
        long initialNonLocalReads = beforeCacheStats.getNonLocalReads();
        long initialAsyncDownloadedMb = beforeCacheStats.getAsyncDownloadedMb();

        assertThat(query("SELECT * FROM " + cachedTableName))
                .containsExactly(tableData);

        assertEventually(
                new Duration(20, SECONDS),
                () -> {
                    // first query via caching catalog should fetch remote data
                    CacheStats afterQueryCacheStats = getCacheStats();
                    assertGreaterThanOrEqual(afterQueryCacheStats.getAsyncDownloadedMb(), initialAsyncDownloadedMb + 5);
                    assertGreaterThan(afterQueryCacheStats.getRemoteReads(), initialRemoteReads);
                    assertEquals(afterQueryCacheStats.getCachedReads(), initialCachedReads);
                    assertEquals(afterQueryCacheStats.getNonLocalReads(), initialNonLocalReads);
                });

        assertEventually(
                new Duration(10, SECONDS),
                () -> {
                    CacheStats beforeQueryCacheStats = getCacheStats();
                    long beforeQueryCachedReads = beforeQueryCacheStats.getCachedReads();
                    long beforeQueryRemoteReads = beforeQueryCacheStats.getRemoteReads();
                    long beforeQueryNonLocalReads = beforeQueryCacheStats.getNonLocalReads();

                    assertThat(query("SELECT * FROM " + cachedTableName))
                            .containsExactly(tableData);

                    // query via caching catalog should read exclusively from cache
                    CacheStats afterQueryCacheStats = getCacheStats();
                    assertGreaterThan(afterQueryCacheStats.getCachedReads(), beforeQueryCachedReads);
                    assertEquals(afterQueryCacheStats.getRemoteReads(), beforeQueryRemoteReads);
                    // all reads should be local as Presto would schedule splits on nodes with cached data
                    assertEquals(afterQueryCacheStats.getNonLocalReads(), beforeQueryNonLocalReads);
                });

        query("DROP TABLE " + nonCachedTableName);
    }

    /**
     * Creates table with 5 text files that are larger than 1MB
     */
    private Row[] createTestTable(String tableName)
    {
        StringBuilder randomDataBuilder = new StringBuilder();
        Random random = new Random();
        for (int i = 0; i < 500_000; ++i) {
            randomDataBuilder.append(random.nextInt(10));
        }
        String randomData = randomDataBuilder.toString();

        query("DROP TABLE IF EXISTS " + tableName);
        query("CREATE TABLE " + tableName + " (col varchar) WITH (format='TEXTFILE')");

        for (int i = 0; i < NUMBER_OF_FILES; ++i) {
            // use `format` to overcome SQL query length limit
            query("INSERT INTO " + tableName + " SELECT format('%1$s%1$s%1$s%1$s%1$s', '" + randomData + "')");
        }

        Row row = row(randomData.repeat(5));
        return Collections.nCopies(NUMBER_OF_FILES, row).toArray(new Row[0]);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy