All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.orc.AbstractOrcDataSource Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.orc;

import com.google.common.collect.ImmutableMap;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.orc.stream.AbstractDiskOrcDataReader;
import io.trino.orc.stream.MemoryOrcDataReader;
import io.trino.orc.stream.OrcDataReader;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;

import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static io.trino.orc.OrcDataSourceUtils.getDiskRangeSlice;
import static io.trino.orc.OrcDataSourceUtils.mergeAdjacentDiskRanges;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;

public abstract class AbstractOrcDataSource
        implements OrcDataSource
{
    private final OrcDataSourceId id;
    private final long estimatedSize;
    private final OrcReaderOptions options;
    private long readTimeNanos;
    private long readBytes;

    public AbstractOrcDataSource(OrcDataSourceId id, long estimatedSize, OrcReaderOptions options)
    {
        this.id = requireNonNull(id, "id is null");

        this.estimatedSize = estimatedSize;
        this.options = requireNonNull(options, "options is null");
    }

    protected Slice readTailInternal(int length)
            throws IOException
    {
        int readSize = toIntExact(min(estimatedSize, length));
        return readFully(estimatedSize - readSize, readSize);
    }

    protected abstract void readInternal(long position, byte[] buffer, int bufferOffset, int bufferLength)
            throws IOException;

    @Override
    public OrcDataSourceId getId()
    {
        return id;
    }

    @Override
    public final long getReadBytes()
    {
        return readBytes;
    }

    @Override
    public final long getReadTimeNanos()
    {
        return readTimeNanos;
    }

    @Override
    public final long getEstimatedSize()
    {
        return estimatedSize;
    }

    @Override
    public Slice readTail(int length)
            throws IOException
    {
        long start = System.nanoTime();

        Slice tailSlice = readTailInternal(length);

        readTimeNanos += System.nanoTime() - start;
        readBytes += tailSlice.length();

        return tailSlice;
    }

    @Override
    public long getRetainedSize()
    {
        return 0;
    }

    @Override
    public final Slice readFully(long position, int length)
            throws IOException
    {
        byte[] buffer = new byte[length];
        readFully(position, buffer, 0, length);
        return Slices.wrappedBuffer(buffer);
    }

    private void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
            throws IOException
    {
        long start = System.nanoTime();

        readInternal(position, buffer, bufferOffset, bufferLength);

        readTimeNanos += System.nanoTime() - start;
        readBytes += bufferLength;
    }

    @Override
    public final  Map readFully(Map diskRanges)
            throws IOException
    {
        requireNonNull(diskRanges, "diskRanges is null");

        if (diskRanges.isEmpty()) {
            return ImmutableMap.of();
        }

        //
        // Note: this code does not use the stream APIs to avoid any extra object allocation
        //

        // split disk ranges into "big" and "small"
        long maxReadSizeBytes = options.getMaxBufferSize().toBytes();
        ImmutableMap.Builder smallRangesBuilder = ImmutableMap.builder();
        ImmutableMap.Builder largeRangesBuilder = ImmutableMap.builder();
        for (Entry entry : diskRanges.entrySet()) {
            if (entry.getValue().getLength() <= maxReadSizeBytes) {
                smallRangesBuilder.put(entry);
            }
            else {
                largeRangesBuilder.put(entry);
            }
        }
        Map smallRanges = smallRangesBuilder.buildOrThrow();
        Map largeRanges = largeRangesBuilder.buildOrThrow();

        // read ranges
        ImmutableMap.Builder slices = ImmutableMap.builder();
        slices.putAll(readSmallDiskRanges(smallRanges));
        slices.putAll(readLargeDiskRanges(largeRanges));

        return slices.buildOrThrow();
    }

    private  Map readSmallDiskRanges(Map diskRanges)
            throws IOException
    {
        if (diskRanges.isEmpty()) {
            return ImmutableMap.of();
        }

        Iterable mergedRanges = mergeAdjacentDiskRanges(diskRanges.values(), options.getMaxMergeDistance(), options.getMaxBufferSize());

        ImmutableMap.Builder slices = ImmutableMap.builder();
        if (options.isLazyReadSmallRanges()) {
            for (DiskRange mergedRange : mergedRanges) {
                LazyBufferLoader mergedRangeLazyLoader = new LazyBufferLoader(mergedRange);
                for (Entry diskRangeEntry : diskRanges.entrySet()) {
                    DiskRange diskRange = diskRangeEntry.getValue();
                    if (mergedRange.contains(diskRange)) {
                        slices.put(diskRangeEntry.getKey(), new MergedOrcDataReader(id, diskRange, mergedRangeLazyLoader));
                    }
                }
            }
        }
        else {
            Map buffers = new LinkedHashMap<>();
            for (DiskRange mergedRange : mergedRanges) {
                // read full range in one request
                Slice buffer = readFully(mergedRange.getOffset(), mergedRange.getLength());
                buffers.put(mergedRange, buffer);
            }

            for (Entry entry : diskRanges.entrySet()) {
                slices.put(entry.getKey(), new MemoryOrcDataReader(id, getDiskRangeSlice(entry.getValue(), buffers), entry.getValue().getLength()));
            }
        }

        Map sliceStreams = slices.buildOrThrow();
        verify(sliceStreams.keySet().equals(diskRanges.keySet()));
        return sliceStreams;
    }

    private  Map readLargeDiskRanges(Map diskRanges)
    {
        if (diskRanges.isEmpty()) {
            return ImmutableMap.of();
        }

        ImmutableMap.Builder slices = ImmutableMap.builder();
        for (Entry entry : diskRanges.entrySet()) {
            DiskRange diskRange = entry.getValue();
            slices.put(entry.getKey(), new DiskOrcDataReader(diskRange));
        }
        return slices.buildOrThrow();
    }

    @Override
    public final String toString()
    {
        return id.toString();
    }

    private final class LazyBufferLoader
    {
        private final DiskRange diskRange;
        private Slice bufferSlice;

        public LazyBufferLoader(DiskRange diskRange)
        {
            this.diskRange = requireNonNull(diskRange, "diskRange is null");
        }

        public Slice loadNestedDiskRangeBuffer(DiskRange nestedDiskRange)
        {
            load();

            checkArgument(diskRange.contains(nestedDiskRange));
            int offset = toIntExact(nestedDiskRange.getOffset() - diskRange.getOffset());
            return bufferSlice.slice(offset, nestedDiskRange.getLength());
        }

        private void load()
        {
            if (bufferSlice != null) {
                return;
            }
            try {
                bufferSlice = readFully(diskRange.getOffset(), diskRange.getLength());
            }
            catch (IOException e) {
                throw new UncheckedIOException(e);
            }
        }
    }

    private final class MergedOrcDataReader
            implements OrcDataReader
    {
        private final OrcDataSourceId orcDataSourceId;
        private final DiskRange diskRange;
        private final LazyBufferLoader lazyBufferLoader;
        private Slice data;

        public MergedOrcDataReader(OrcDataSourceId orcDataSourceId, DiskRange diskRange, LazyBufferLoader lazyBufferLoader)
        {
            this.orcDataSourceId = requireNonNull(orcDataSourceId, "orcDataSourceId is null");
            this.diskRange = requireNonNull(diskRange, "diskRange is null");
            this.lazyBufferLoader = requireNonNull(lazyBufferLoader, "lazyBufferLoader is null");
        }

        @Override
        public OrcDataSourceId getOrcDataSourceId()
        {
            return orcDataSourceId;
        }

        @Override
        public long getRetainedSize()
        {
            return data == null ? 0 : diskRange.getLength();
        }

        @Override
        public int getSize()
        {
            return diskRange.getLength();
        }

        @Override
        public int getMaxBufferSize()
        {
            return diskRange.getLength();
        }

        @Override
        public Slice seekBuffer(int newPosition)
                throws IOException
        {
            if (data == null) {
                data = lazyBufferLoader.loadNestedDiskRangeBuffer(diskRange);
                if (data == null) {
                    throw new OrcCorruptionException(id, "Data loader returned null");
                }
                if (data.length() != diskRange.getLength()) {
                    throw new OrcCorruptionException(id, "Expected to load %s bytes, but %s bytes were loaded", diskRange.getLength(), data.length());
                }
            }
            return data.slice(newPosition, data.length() - newPosition);
        }

        @Override
        public String toString()
        {
            return toStringHelper(this)
                    .add("orcDataSourceId", orcDataSourceId)
                    .add("diskRange", diskRange)
                    .toString();
        }
    }

    private class DiskOrcDataReader
            extends AbstractDiskOrcDataReader
    {
        private final DiskRange diskRange;

        public DiskOrcDataReader(DiskRange diskRange)
        {
            super(id, diskRange.getLength(), toIntExact(options.getStreamBufferSize().toBytes()));
            this.diskRange = diskRange;
        }

        @Override
        public void read(long position, byte[] buffer, int bufferOffset, int length)
                throws IOException
        {
            readFully(diskRange.getOffset() + position, buffer, bufferOffset, length);
        }

        @Override
        public String toString()
        {
            return toStringHelper(this)
                    .add("orcDataSourceId", getOrcDataSourceId())
                    .add("diskRange", diskRange)
                    .add("maxBufferSize", getMaxBufferSize())
                    .toString();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy