All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.orc.CachingStripeMetadataSource Maven / Gradle / Ivy

There is a newer version: 0.291
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc;

import com.facebook.presto.common.RuntimeStats;
import com.facebook.presto.orc.StripeReader.StripeId;
import com.facebook.presto.orc.StripeReader.StripeStreamId;
import com.facebook.presto.orc.metadata.MetadataReader;
import com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion;
import com.facebook.presto.orc.metadata.RowGroupIndex;
import com.facebook.presto.orc.metadata.Stream.StreamKind;
import com.facebook.presto.orc.metadata.statistics.HiveBloomFilter;
import com.facebook.presto.orc.stream.OrcInputStream;
import com.google.common.cache.Cache;
import com.google.common.collect.ImmutableMap;
import com.google.common.util.concurrent.UncheckedExecutionException;
import io.airlift.slice.BasicSliceInput;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.concurrent.ExecutionException;

import static com.facebook.presto.common.RuntimeUnit.BYTE;
import static com.facebook.presto.common.RuntimeUnit.NONE;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.BLOOM_FILTER;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.ROW_INDEX;
import static com.google.common.base.Throwables.throwIfInstanceOf;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;

public class CachingStripeMetadataSource
        implements StripeMetadataSource
{
    private final StripeMetadataSource delegate;
    private final Cache footerSliceCache;
    private final Cache stripeStreamCache;
    private final Optional>> rowGroupIndexCache;

    public CachingStripeMetadataSource(StripeMetadataSource delegate, Cache footerSliceCache, Cache stripeStreamCache, Optional>> rowGroupIndexCache)
    {
        this.delegate = requireNonNull(delegate, "delegate is null");
        this.footerSliceCache = requireNonNull(footerSliceCache, "footerSliceCache is null");
        this.stripeStreamCache = requireNonNull(stripeStreamCache, "rowIndexSliceCache is null");
        this.rowGroupIndexCache = requireNonNull(rowGroupIndexCache, "rowGroupIndexCache is null");
    }

    @Override
    public Slice getStripeFooterSlice(OrcDataSource orcDataSource, StripeId stripeId, long footerOffset, int footerLength, boolean cacheable)
            throws IOException
    {
        try {
            if (!cacheable) {
                return delegate.getStripeFooterSlice(orcDataSource, stripeId, footerOffset, footerLength, cacheable);
            }
            return footerSliceCache.get(stripeId, () -> delegate.getStripeFooterSlice(orcDataSource, stripeId, footerOffset, footerLength, cacheable));
        }
        catch (ExecutionException | UncheckedExecutionException e) {
            throwIfInstanceOf(e.getCause(), IOException.class);
            throw new IOException("Unexpected error in stripe footer reading after footerSliceCache miss", e.getCause());
        }
    }

    @Override
    public Map getInputs(OrcDataSource orcDataSource, StripeId stripeId, Map diskRanges, boolean cacheable)
            throws IOException
    {
        if (!cacheable) {
            return delegate.getInputs(orcDataSource, stripeId, diskRanges, cacheable);
        }

        // Fetch existing stream slice from cache
        ImmutableMap.Builder inputsBuilder = ImmutableMap.builder();
        ImmutableMap.Builder uncachedDiskRangesBuilder = ImmutableMap.builder();
        for (Entry entry : diskRanges.entrySet()) {
            if (isCachedStream(entry.getKey().getStreamKind())) {
                Slice streamSlice = stripeStreamCache.getIfPresent(new StripeStreamId(stripeId, entry.getKey()));
                if (streamSlice != null) {
                    inputsBuilder.put(entry.getKey(), new OrcDataSourceInput(new BasicSliceInput(streamSlice), streamSlice.length()));
                }
                else {
                    uncachedDiskRangesBuilder.put(entry);
                }
            }
            else {
                uncachedDiskRangesBuilder.put(entry);
            }
        }

        // read ranges and update cache
        Map uncachedInputs = delegate.getInputs(orcDataSource, stripeId, uncachedDiskRangesBuilder.build(), cacheable);
        for (Entry entry : uncachedInputs.entrySet()) {
            if (isCachedStream(entry.getKey().getStreamKind())) {
                // We need to rewind the input after eagerly reading the slice.
                Slice streamSlice = Slices.wrappedBuffer(entry.getValue().getInput().readSlice(toIntExact(entry.getValue().getInput().length())).getBytes());
                stripeStreamCache.put(new StripeStreamId(stripeId, entry.getKey()), streamSlice);
                inputsBuilder.put(entry.getKey(), new OrcDataSourceInput(new BasicSliceInput(streamSlice), toIntExact(streamSlice.getRetainedSize())));
            }
            else {
                inputsBuilder.put(entry.getKey(), entry.getValue());
            }
        }
        return inputsBuilder.build();
    }

    @Override
    public List getRowIndexes(
            MetadataReader metadataReader,
            HiveWriterVersion hiveWriterVersion,
            StripeId stripId,
            StreamId streamId,
            OrcInputStream inputStream,
            List bloomFilters,
            RuntimeStats runtimeStats)
            throws IOException
    {
        if (rowGroupIndexCache.isPresent()) {
            List rowGroupIndices = rowGroupIndexCache.get().getIfPresent(new StripeStreamId(stripId, streamId));
            if (rowGroupIndices != null) {
                runtimeStats.addMetricValue("OrcRowGroupIndexCacheHit", NONE, 1);
                runtimeStats.addMetricValue("OrcRowGroupIndexInMemoryBytesRead", BYTE, rowGroupIndices.stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum());
                return rowGroupIndices;
            }
            else {
                runtimeStats.addMetricValue("OrcRowGroupIndexCacheHit", NONE, 0);
                runtimeStats.addMetricValue("OrcRowGroupIndexStorageBytesRead", BYTE, inputStream.getRetainedSizeInBytes());
            }
        }
        List rowGroupIndices = delegate.getRowIndexes(metadataReader, hiveWriterVersion, stripId, streamId, inputStream, bloomFilters, runtimeStats);
        if (rowGroupIndexCache.isPresent()) {
            rowGroupIndexCache.get().put(new StripeStreamId(stripId, streamId), rowGroupIndices);
        }
        return rowGroupIndices;
    }

    private static boolean isCachedStream(StreamKind streamKind)
    {
        // BLOOM_FILTER and ROW_INDEX are on the critical path to generate a stripe. Other stream kinds could be lazily read.
        return streamKind == BLOOM_FILTER || streamKind == ROW_INDEX;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy