All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.common.ndv.hll.HLLDenseRegister Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.common.ndv.hll;

import java.util.Arrays;

public class HLLDenseRegister implements HLLRegister {

  // 2^p number of bytes for register
  private byte[] register;

  // max value stored in registered is cached to determine the bit width for
  // bit packing
  private int maxRegisterValue;

  // number of register bits
  private int p;

  // m = 2^p
  private int m;

  public HLLDenseRegister(int p) {
    this(p, true);
  }

  public HLLDenseRegister(int p, boolean bitPack) {
    this.p = p;
    this.m = 1 << p;
    this.register = new byte[m];
    this.maxRegisterValue = 0;
    if (bitPack == false) {
      this.maxRegisterValue = 0xff;
    }
  }

  public boolean add(long hashcode) {

    // LSB p bits
    final int registerIdx = (int) (hashcode & (m - 1));

    // MSB 64 - p bits
    final long w = hashcode >>> p;

    // longest run of trailing zeroes
    final int lr = Long.numberOfTrailingZeros(w) + 1;
    return set(registerIdx, (byte) lr);
  }

  public boolean set(int idx, byte value) {
    boolean updated = false;
    if (idx < register.length && value > register[idx]) {

      // update max register value
      if (value > maxRegisterValue) {
        maxRegisterValue = value;
      }

      // set register value and compute inverse pow of 2 for register value
      register[idx] = value;

      updated = true;
    }
    return updated;
  }

  public int size() {
    return register.length;
  }

  public int getNumZeroes() {
    int numZeroes = 0;
    for (byte b : register) {
      if (b == 0) {
        numZeroes++;
      }
    }
    return numZeroes;
  }

  public void merge(HLLRegister hllRegister) {
    if (hllRegister instanceof HLLDenseRegister) {
      HLLDenseRegister hdr = (HLLDenseRegister) hllRegister;
      byte[] inRegister = hdr.getRegister();

      // merge only if the register length matches
      if (register.length != inRegister.length) {
        throw new IllegalArgumentException(
            "The size of register sets of HyperLogLogs to be merged does not match.");
      }

      // compare register values and store the max register value
      for (int i = 0; i < inRegister.length; i++) {
        final byte cb = register[i];
        final byte ob = inRegister[i];
        register[i] = ob > cb ? ob : cb;
      }

      // update max register value
      if (hdr.getMaxRegisterValue() > maxRegisterValue) {
        maxRegisterValue = hdr.getMaxRegisterValue();
      }
    } else {
      throw new IllegalArgumentException("Specified register is not instance of HLLDenseRegister");
    }
  }

  public byte[] getRegister() {
    return register;
  }

  public void setRegister(byte[] register) {
    this.register = register;
  }

  public int getMaxRegisterValue() {
    return maxRegisterValue;
  }

  public double getSumInversePow2() {
    double sum = 0;
    for (byte b : register) {
      sum += HLLConstants.inversePow2Data[b];
    }
    return sum;
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append("HLLDenseRegister - ");
    sb.append("p: ");
    sb.append(p);
    sb.append(" numZeroes: ");
    sb.append(getNumZeroes());
    sb.append(" maxRegisterValue: ");
    sb.append(maxRegisterValue);
    return sb.toString();
  }

  public String toExtendedString() {
    return toString() + " register: " + Arrays.toString(register);
  }

  @Override
  public boolean equals(Object obj) {
    if (!(obj instanceof HLLDenseRegister)) {
      return false;
    }
    HLLDenseRegister other = (HLLDenseRegister) obj;
    return maxRegisterValue == other.maxRegisterValue && Arrays.equals(register, other.register);
  }

  @Override
  public int hashCode() {
    int hashcode = 0;
    hashcode += 31 * maxRegisterValue;
    hashcode += Arrays.hashCode(register);
    return hashcode;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy