All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.jdbc.internal.airlift.stats.cardinality.HyperLogLog Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.jdbc.internal.airlift.stats.cardinality;

import com.facebook.presto.jdbc.internal.guava.annotations.VisibleForTesting;
import com.facebook.presto.jdbc.internal.airlift.slice.Murmur3;
import com.facebook.presto.jdbc.internal.airlift.slice.Slice;

import static com.facebook.presto.jdbc.internal.guava.base.Preconditions.checkArgument;
import static com.facebook.presto.jdbc.internal.airlift.stats.cardinality.Utils.indexBitLength;

public class HyperLogLog
{
    private static final int MAX_NUMBER_OF_BUCKETS = 65536;
    private HllInstance instance;

    private HyperLogLog(HllInstance instance)
    {
        this.instance = instance;
    }

    public static HyperLogLog newInstance(int numberOfBuckets)
    {
        checkArgument(numberOfBuckets <= MAX_NUMBER_OF_BUCKETS, "numberOfBuckets must be <= %s, actual: %s", MAX_NUMBER_OF_BUCKETS, numberOfBuckets);

        return new HyperLogLog(new SparseHll(indexBitLength(numberOfBuckets)));
    }

    public static HyperLogLog newInstance(Slice serialized)
    {
        checkArgument(serialized.getByte(0) != Format.SPARSE_V1.getTag(), "Sparse v1 encoding no longer supported");

        if (SparseHll.canDeserialize(serialized)) {
            return new HyperLogLog(new SparseHll(serialized));
        }
        else if (DenseHll.canDeserialize(serialized)) {
            return new HyperLogLog(new DenseHll(serialized));
        }

        throw new IllegalArgumentException("Cannot deserialize HyperLogLog");
    }

    public void add(long value)
    {
        addHash(Murmur3.hash64(value));
    }

    public void add(Slice value)
    {
        addHash(Murmur3.hash64(value));
    }

    /**
     * Adds a value that has already been hashed to the set of values tracked by this HyperLogLog instance.
     *
     * @param hash The hash should be the 64 least significant bits of the murmur3_128 hash of the value.
     * For example: com.facebook.presto.jdbc.internal.airlift.slice.Murmur3.hash64(value).
     */
    public void addHash(long hash)
    {
        instance.insertHash(hash);

        if (instance instanceof SparseHll) {
            instance = makeDenseIfNecessary((SparseHll) instance);
        }
    }

    public void mergeWith(HyperLogLog other)
    {
        if (instance instanceof SparseHll && other.instance instanceof SparseHll) {
            ((SparseHll) instance).mergeWith((SparseHll) other.instance);
            instance = makeDenseIfNecessary((SparseHll) instance);
        }
        else {
            DenseHll dense = instance.toDense();
            dense.mergeWith(other.instance.toDense());

            instance = dense;
        }
    }

    public long cardinality()
    {
        return instance.cardinality();
    }

    public int estimatedInMemorySize()
    {
        return instance.estimatedInMemorySize();
    }

    public int estimatedSerializedSize()
    {
        return instance.estimatedSerializedSize();
    }

    public Slice serialize()
    {
        return instance.serialize();
    }

    public void makeDense()
    {
        instance = instance.toDense();
    }

    @VisibleForTesting
    void verify()
    {
        instance.verify();
    }

    private static HllInstance makeDenseIfNecessary(SparseHll instance)
    {
        if (instance.estimatedInMemorySize() > DenseHll.estimatedInMemorySize(instance.getIndexBitLength())) {
            return instance.toDense();
        }

        return instance;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy