All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.datakernel.aggregation.measure.HyperLogLog Maven / Gradle / Ivy

/*
 * Copyright (C) 2015 SoftIndex LLC.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.datakernel.aggregation.measure;

import io.datakernel.util.HashUtils;

import static java.lang.Math.exp;
import static java.lang.Math.log;

public final class HyperLogLog implements Comparable {
	private final byte[] registers;

	public HyperLogLog(int registers) {
		this.registers = new byte[registers];
	}

	public HyperLogLog(byte[] registers) {
		this.registers = registers;
	}

	public byte[] getRegisters() {
		return registers;
	}

	public static HyperLogLog union(HyperLogLog a, HyperLogLog b) {
		assert a.registers.length == b.registers.length;
		byte[] buckets = new byte[a.registers.length];
		for (int i = 0; i < a.registers.length; i++) {
			buckets[i] = a.registers[i] > b.registers[i] ? a.registers[i] : b.registers[i];
		}
		return new HyperLogLog(buckets);
	}

	public void union(HyperLogLog another) {
		assert this.registers.length == another.registers.length;
		for (int i = 0; i < this.registers.length; i++) {
			byte thisValue = this.registers[i];
			byte thatValue = another.registers[i];
			if (thatValue > thisValue) {
				this.registers[i] = thatValue;
			}
		}
	}

	public void addToRegister(int register, int valueHash) {
		int zeros = Integer.numberOfTrailingZeros(valueHash) + 1;
		if (registers[register] < zeros) {
			registers[register] = (byte) zeros;
		}
	}

	public void addLongHash(long longHash) {
		addToRegister((int) longHash & (registers.length - 1), (int) (longHash >>> 32));
	}

	public void addObject(Object item) {
		addInt(item.hashCode());
	}

	public void addLong(long value) {
		addLongHash(HashUtils.murmur3hash(value));
	}

	public void addInt(int value) {
		addLongHash(HashUtils.murmur3hash(value));
	}

	private static final double ALPHA_16 = 0.673 * 16 * 16;
	private static final double ALPHA_32 = 0.697 * 32 * 32;
	private static final double ALPHA_64 = 0.709 * 64 * 64;
	private static final double ALPHA_XX = 0.7213;
	private static final double NLOG2 = -log(2.0);

	public int estimate() {
		int m = registers.length;
		double alpha;
		if (m == 16) {
			alpha = ALPHA_16;
		} else if (m == 32) {
			alpha = ALPHA_32;
		} else if (m == 64) {
			alpha = ALPHA_64;
		} else {
			alpha = ALPHA_XX / (1 + 1.079 / m) * m * m;
		}

		double sum = 0;
		for (byte value : registers) {
			sum += exp(value * NLOG2);
		}
		double estimate = alpha / sum;

		if (estimate < 5.0 / 2.0 * m) {
			int zeroCount = 0;
			for (byte bucket : registers) {
				if (bucket == 0)
					zeroCount++;
			}
			if (zeroCount != 0) {
				estimate = m * log((double) m / zeroCount);
			}
		}

		return (int) estimate;
	}

	@Override
	public int compareTo(HyperLogLog that) {
		return Integer.compare(this.estimate(), that.estimate());
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy