All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.orc.DefaultOrcWriterFlushPolicy Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc;

import com.facebook.presto.common.Page;
import io.airlift.units.DataSize;

import java.util.Optional;

import static com.facebook.presto.orc.FlushReason.DICTIONARY_FULL;
import static com.facebook.presto.orc.FlushReason.MAX_BYTES;
import static com.facebook.presto.orc.FlushReason.MAX_ROWS;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static java.lang.Math.max;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;

public class DefaultOrcWriterFlushPolicy
        implements OrcWriterFlushPolicy
{
    public static final DataSize DEFAULT_STRIPE_MIN_SIZE = new DataSize(32, MEGABYTE);
    public static final DataSize DEFAULT_STRIPE_MAX_SIZE = new DataSize(64, MEGABYTE);
    public static final int DEFAULT_STRIPE_MAX_ROW_COUNT = 10_000_000;

    private final int stripeMaxRowCount;
    private final int stripeMinBytes;
    private final int stripeMaxBytes;

    private DefaultOrcWriterFlushPolicy(int stripeMaxRowCount, int stripeMinBytes, int stripeMaxBytes)
    {
        this.stripeMaxRowCount = stripeMaxRowCount;
        this.stripeMinBytes = stripeMinBytes;
        this.stripeMaxBytes = stripeMaxBytes;
    }

    @Override
    public Optional shouldFlushStripe(int stripeRowCount, int bufferedBytes, boolean dictionaryIsFull)
    {
        if (stripeRowCount == stripeMaxRowCount) {
            return Optional.of(MAX_ROWS);
        }
        else if (bufferedBytes > stripeMaxBytes) {
            return Optional.of(MAX_BYTES);
        }
        else if (dictionaryIsFull) {
            return Optional.of(DICTIONARY_FULL);
        }
        return Optional.empty();
    }

    @Override
    public int getMaxChunkRowCount(Page page)
    {
        // avoid chunks with huge logical size
        int chunkMaxLogicalBytes = max(1, stripeMaxBytes / 2);
        double averageLogicalSizePerRow = (double) page.getApproximateLogicalSizeInBytes() / page.getPositionCount();
        return max(1, (int) (chunkMaxLogicalBytes / max(1, averageLogicalSizePerRow)));
    }

    @Override
    public int getStripeMinBytes()
    {
        return stripeMinBytes;
    }

    @Override
    public int getStripeMaxBytes()
    {
        return stripeMaxBytes;
    }

    @Override
    public int getStripeMaxRowCount()
    {
        return stripeMaxRowCount;
    }

    @Override
    public String toString()
    {
        return toStringHelper(this)
                .add("stripeMaxRowCount", stripeMaxRowCount)
                .add("stripeMinBytes", stripeMinBytes)
                .add("stripeMaxBytes", stripeMaxBytes)
                .toString();
    }

    public static Builder builder()
    {
        return new Builder();
    }

    public static class Builder
    {
        private int stripeMaxRowCount = DEFAULT_STRIPE_MAX_ROW_COUNT;
        private DataSize stripeMinSize = DEFAULT_STRIPE_MIN_SIZE;
        private DataSize stripeMaxSize = DEFAULT_STRIPE_MAX_SIZE;

        private Builder() {}

        public Builder withStripeMaxRowCount(int stripeMaxRowCount)
        {
            checkArgument(stripeMaxRowCount >= 1, "stripeMaxRowCount must be at least 1");
            this.stripeMaxRowCount = stripeMaxRowCount;
            return this;
        }

        public Builder withStripeMinSize(DataSize stripeMinSize)
        {
            this.stripeMinSize = requireNonNull(stripeMinSize, "stripeMinSize is null");
            return this;
        }

        public Builder withStripeMaxSize(DataSize stripeMaxSize)
        {
            this.stripeMaxSize = requireNonNull(stripeMaxSize, "stripeMaxSize is null");
            return this;
        }

        public DefaultOrcWriterFlushPolicy build()
        {
            checkArgument(stripeMaxSize.compareTo(stripeMinSize) >= 0, "stripeMaxSize must be greater than or equal to stripeMinSize");
            return new DefaultOrcWriterFlushPolicy(
                    stripeMaxRowCount,
                    toIntExact(stripeMinSize.toBytes()),
                    toIntExact(stripeMaxSize.toBytes()));
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy