All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.orc.OrcWriterOptions Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.orc;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import io.airlift.units.DataSize;

import java.util.Set;

import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.units.DataSize.Unit.KILOBYTE;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static java.util.Objects.requireNonNull;

public class OrcWriterOptions
{
    public enum WriterIdentification
    {
        /**
         * Write ORC files with a writer identification and version number that is readable by Hive 2.0.0 to 2.2.0
         */
        LEGACY_HIVE_COMPATIBLE,

        /**
         * Write ORC files with Trino writer identification.
         */
        TRINO,
    }

    @VisibleForTesting
    static final DataSize DEFAULT_MAX_STRING_STATISTICS_LIMIT = DataSize.ofBytes(64);
    @VisibleForTesting
    static final DataSize DEFAULT_MAX_COMPRESSION_BUFFER_SIZE = DataSize.of(256, KILOBYTE);
    private static final double DEFAULT_BLOOM_FILTER_FPP = 0.05;
    private static final DataSize DEFAULT_STRIPE_MIN_SIZE = DataSize.of(32, MEGABYTE);
    private static final DataSize DEFAULT_STRIPE_MAX_SIZE = DataSize.of(64, MEGABYTE);
    private static final int DEFAULT_STRIPE_MAX_ROW_COUNT = 10_000_000;
    private static final int DEFAULT_ROW_GROUP_MAX_ROW_COUNT = 10_000;
    private static final DataSize DEFAULT_DICTIONARY_MAX_MEMORY = DataSize.of(16, MEGABYTE);

    private final WriterIdentification writerIdentification;
    private final DataSize stripeMinSize;
    private final DataSize stripeMaxSize;
    private final int stripeMaxRowCount;
    private final int rowGroupMaxRowCount;
    private final DataSize dictionaryMaxMemory;
    private final DataSize maxStringStatisticsLimit;
    private final DataSize maxCompressionBufferSize;
    private final Set bloomFilterColumns;
    private final double bloomFilterFpp;
    private final boolean shouldCompactMinMax;

    public OrcWriterOptions()
    {
        this(
                WriterIdentification.TRINO,
                DEFAULT_STRIPE_MIN_SIZE,
                DEFAULT_STRIPE_MAX_SIZE,
                DEFAULT_STRIPE_MAX_ROW_COUNT,
                DEFAULT_ROW_GROUP_MAX_ROW_COUNT,
                DEFAULT_DICTIONARY_MAX_MEMORY,
                DEFAULT_MAX_STRING_STATISTICS_LIMIT,
                DEFAULT_MAX_COMPRESSION_BUFFER_SIZE,
                ImmutableSet.of(),
                DEFAULT_BLOOM_FILTER_FPP,
                true);
    }

    private OrcWriterOptions(
            WriterIdentification writerIdentification,
            DataSize stripeMinSize,
            DataSize stripeMaxSize,
            int stripeMaxRowCount,
            int rowGroupMaxRowCount,
            DataSize dictionaryMaxMemory,
            DataSize maxStringStatisticsLimit,
            DataSize maxCompressionBufferSize,
            Set bloomFilterColumns,
            double bloomFilterFpp,
            boolean shouldCompactMinMax)
    {
        requireNonNull(stripeMinSize, "stripeMinSize is null");
        requireNonNull(stripeMaxSize, "stripeMaxSize is null");
        checkArgument(stripeMaxRowCount >= 1, "stripeMaxRowCount must be at least 1");
        checkArgument(rowGroupMaxRowCount >= 1, "rowGroupMaxRowCount must be at least 1");
        requireNonNull(dictionaryMaxMemory, "dictionaryMaxMemory is null");
        requireNonNull(maxStringStatisticsLimit, "maxStringStatisticsLimit is null");
        requireNonNull(maxCompressionBufferSize, "maxCompressionBufferSize is null");
        requireNonNull(bloomFilterColumns, "bloomFilterColumns is null");
        checkArgument(bloomFilterFpp > 0.0 && bloomFilterFpp < 1.0, "bloomFilterFpp should be > 0.0 & < 1.0");

        this.writerIdentification = requireNonNull(writerIdentification, "writerIdentification is null");
        this.stripeMinSize = stripeMinSize;
        this.stripeMaxSize = stripeMaxSize;
        this.stripeMaxRowCount = stripeMaxRowCount;
        this.rowGroupMaxRowCount = rowGroupMaxRowCount;
        this.dictionaryMaxMemory = dictionaryMaxMemory;
        this.maxStringStatisticsLimit = maxStringStatisticsLimit;
        this.maxCompressionBufferSize = maxCompressionBufferSize;
        this.bloomFilterColumns = ImmutableSet.copyOf(bloomFilterColumns);
        this.bloomFilterFpp = bloomFilterFpp;
        this.shouldCompactMinMax = shouldCompactMinMax;
    }

    public WriterIdentification getWriterIdentification()
    {
        return writerIdentification;
    }

    public OrcWriterOptions withWriterIdentification(WriterIdentification writerIdentification)
    {
        return builderFrom(this)
                .setWriterIdentification(writerIdentification)
                .build();
    }

    public DataSize getStripeMinSize()
    {
        return stripeMinSize;
    }

    public OrcWriterOptions withStripeMinSize(DataSize stripeMinSize)
    {
        return builderFrom(this)
                .setStripeMinSize(stripeMinSize)
                .build();
    }

    public DataSize getStripeMaxSize()
    {
        return stripeMaxSize;
    }

    public OrcWriterOptions withStripeMaxSize(DataSize stripeMaxSize)
    {
        return builderFrom(this)
                .setStripeMaxSize(stripeMaxSize)
                .build();
    }

    public int getStripeMaxRowCount()
    {
        return stripeMaxRowCount;
    }

    public OrcWriterOptions withStripeMaxRowCount(int stripeMaxRowCount)
    {
        return builderFrom(this)
                .setStripeMaxRowCount(stripeMaxRowCount)
                .build();
    }

    public int getRowGroupMaxRowCount()
    {
        return rowGroupMaxRowCount;
    }

    public OrcWriterOptions withRowGroupMaxRowCount(int rowGroupMaxRowCount)
    {
        return builderFrom(this)
                .setRowGroupMaxRowCount(rowGroupMaxRowCount)
                .build();
    }

    public DataSize getDictionaryMaxMemory()
    {
        return dictionaryMaxMemory;
    }

    public OrcWriterOptions withDictionaryMaxMemory(DataSize dictionaryMaxMemory)
    {
        return builderFrom(this)
                .setDictionaryMaxMemory(dictionaryMaxMemory)
                .build();
    }

    public DataSize getMaxStringStatisticsLimit()
    {
        return maxStringStatisticsLimit;
    }

    public OrcWriterOptions withMaxStringStatisticsLimit(DataSize maxStringStatisticsLimit)
    {
        return builderFrom(this)
                .setMaxStringStatisticsLimit(maxStringStatisticsLimit)
                .build();
    }

    public DataSize getMaxCompressionBufferSize()
    {
        return maxCompressionBufferSize;
    }

    public OrcWriterOptions withMaxCompressionBufferSize(DataSize maxCompressionBufferSize)
    {
        return builderFrom(this)
                .setMaxCompressionBufferSize(maxCompressionBufferSize)
                .build();
    }

    public boolean isBloomFilterColumn(String columnName)
    {
        return bloomFilterColumns.contains(columnName);
    }

    public OrcWriterOptions withBloomFilterColumns(Set bloomFilterColumns)
    {
        return builderFrom(this)
                .setBloomFilterColumns(bloomFilterColumns)
                .build();
    }

    public double getBloomFilterFpp()
    {
        return bloomFilterFpp;
    }

    public OrcWriterOptions withBloomFilterFpp(double bloomFilterFpp)
    {
        return builderFrom(this)
                .setBloomFilterFpp(bloomFilterFpp)
                .build();
    }

    public boolean isShouldCompactMinMax()
    {
        return shouldCompactMinMax;
    }

    public OrcWriterOptions withShouldCompactMinMax(boolean shouldCompactMinMax)
    {
        return builderFrom(this)
                .setShouldCompactMinMax(shouldCompactMinMax)
                .build();
    }

    @Override
    public String toString()
    {
        return toStringHelper(this)
                .add("stripeMinSize", stripeMinSize)
                .add("stripeMaxSize", stripeMaxSize)
                .add("stripeMaxRowCount", stripeMaxRowCount)
                .add("rowGroupMaxRowCount", rowGroupMaxRowCount)
                .add("dictionaryMaxMemory", dictionaryMaxMemory)
                .add("maxStringStatisticsLimit", maxStringStatisticsLimit)
                .add("maxCompressionBufferSize", maxCompressionBufferSize)
                .add("bloomFilterColumns", bloomFilterColumns)
                .add("bloomFilterFpp", bloomFilterFpp)
                .toString();
    }

    public static Builder builder()
    {
        return builderFrom(new OrcWriterOptions());
    }

    public static Builder builderFrom(OrcWriterOptions options)
    {
        return new Builder(options);
    }

    public static final class Builder
    {
        private WriterIdentification writerIdentification;
        private DataSize stripeMinSize;
        private DataSize stripeMaxSize;
        private int stripeMaxRowCount;
        private int rowGroupMaxRowCount;
        private DataSize dictionaryMaxMemory;
        private DataSize maxStringStatisticsLimit;
        private DataSize maxCompressionBufferSize;
        private Set bloomFilterColumns;
        private double bloomFilterFpp;
        private boolean shouldCompactMinMax;

        private Builder(OrcWriterOptions options)
        {
            requireNonNull(options, "options is null");

            this.writerIdentification = options.writerIdentification;
            this.stripeMinSize = options.stripeMinSize;
            this.stripeMaxSize = options.stripeMaxSize;
            this.stripeMaxRowCount = options.stripeMaxRowCount;
            this.rowGroupMaxRowCount = options.rowGroupMaxRowCount;
            this.dictionaryMaxMemory = options.dictionaryMaxMemory;
            this.maxStringStatisticsLimit = options.maxStringStatisticsLimit;
            this.maxCompressionBufferSize = options.maxCompressionBufferSize;
            this.bloomFilterColumns = ImmutableSet.copyOf(options.bloomFilterColumns);
            this.bloomFilterFpp = options.bloomFilterFpp;
            this.shouldCompactMinMax = options.shouldCompactMinMax;
        }

        public Builder setWriterIdentification(WriterIdentification writerIdentification)
        {
            this.writerIdentification = writerIdentification;
            return this;
        }

        public Builder setStripeMinSize(DataSize stripeMinSize)
        {
            this.stripeMinSize = stripeMinSize;
            return this;
        }

        public Builder setStripeMaxSize(DataSize stripeMaxSize)
        {
            this.stripeMaxSize = stripeMaxSize;
            return this;
        }

        public Builder setStripeMaxRowCount(int stripeMaxRowCount)
        {
            this.stripeMaxRowCount = stripeMaxRowCount;
            return this;
        }

        public Builder setRowGroupMaxRowCount(int rowGroupMaxRowCount)
        {
            this.rowGroupMaxRowCount = rowGroupMaxRowCount;
            return this;
        }

        public Builder setDictionaryMaxMemory(DataSize dictionaryMaxMemory)
        {
            this.dictionaryMaxMemory = dictionaryMaxMemory;
            return this;
        }

        public Builder setMaxStringStatisticsLimit(DataSize maxStringStatisticsLimit)
        {
            this.maxStringStatisticsLimit = maxStringStatisticsLimit;
            return this;
        }

        public Builder setMaxCompressionBufferSize(DataSize maxCompressionBufferSize)
        {
            this.maxCompressionBufferSize = maxCompressionBufferSize;
            return this;
        }

        public Builder setBloomFilterColumns(Set bloomFilterColumns)
        {
            this.bloomFilterColumns = bloomFilterColumns;
            return this;
        }

        public Builder setBloomFilterFpp(double bloomFilterFpp)
        {
            this.bloomFilterFpp = bloomFilterFpp;
            return this;
        }

        public Builder setShouldCompactMinMax(boolean shouldCompactMinMax)
        {
            this.shouldCompactMinMax = shouldCompactMinMax;
            return this;
        }

        public OrcWriterOptions build()
        {
            return new OrcWriterOptions(
                    writerIdentification,
                    stripeMinSize,
                    stripeMaxSize,
                    stripeMaxRowCount,
                    rowGroupMaxRowCount,
                    dictionaryMaxMemory,
                    maxStringStatisticsLimit,
                    maxCompressionBufferSize,
                    bloomFilterColumns,
                    bloomFilterFpp,
                    shouldCompactMinMax);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy