All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 6.1.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.tuple.arrayofdoubles;

import static org.apache.datasketches.common.Util.LS;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.thetacommon.BinomialBoundsN;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.tuple.SerializerDeserializer;

/**
 * The base class for the tuple sketch of type ArrayOfDoubles, where an array of double values
 * is associated with each key.
 * A primitive array of doubles is used here, as opposed to a generic Summary object,
 * for improved performance.
 */
public abstract class ArrayOfDoublesSketch {

  // The concept of being empty is about representing an empty set.
  // So a sketch can be non-empty, and have no entries.
  // For example, as a result of a sampling, when some data was presented to the sketch, but no
  //  entries were retained.
  static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES }

  static final int SIZE_OF_KEY_BYTES = Long.BYTES;
  static final int SIZE_OF_VALUE_BYTES = Double.BYTES;

  // Common Layout of first 16 bytes and Empty AoDCompactSketch:
  // Long || Start Byte Adr:
  // Adr:
  //      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
  //  0   ||    Seed Hash    | #Dbls  |  Flags | SkType | FamID  | SerVer |  Preamble_Longs    |
  //      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
  //  1   ||-------------------------Theta Long------------------------------------------------|

  static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1
  static final int SERIAL_VERSION_BYTE = 1;
  static final int FAMILY_ID_BYTE = 2;
  static final int SKETCH_TYPE_BYTE = 3;
  static final int FLAGS_BYTE = 4;
  static final int NUM_VALUES_BYTE = 5;
  static final int SEED_HASH_SHORT = 6;
  static final int THETA_LONG = 8;

  final int numValues_;

  long thetaLong_;
  boolean isEmpty_ = true;

  ArrayOfDoublesSketch(final int numValues) {
    numValues_ = numValues;
  }

  /**
   * Heapify the given Memory as an ArrayOfDoublesSketch
   * @param mem the given Memory
   * @return an ArrayOfDoublesSketch
   */
  public static ArrayOfDoublesSketch heapify(final Memory mem) {
    return heapify(mem, ThetaUtil.DEFAULT_UPDATE_SEED);
  }

  /**
   * Heapify the given Memory and seed as a ArrayOfDoublesSketch
   * @param mem the given Memory
   * @param seed the given seed
   * @return an ArrayOfDoublesSketch
   */
  public static ArrayOfDoublesSketch heapify(final Memory mem, final long seed) {
    final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(mem);
    if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) {
      return new HeapArrayOfDoublesQuickSelectSketch(mem, seed);
    }
    return new HeapArrayOfDoublesCompactSketch(mem, seed);
  }

  /**
   * Wrap the given Memory as an ArrayOfDoublesSketch
   * @param mem the given Memory
   * @return an ArrayOfDoublesSketch
   */
  public static ArrayOfDoublesSketch wrap(final Memory mem) {
    return wrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED);
  }

  /**
   * Wrap the given Memory and seed as a ArrayOfDoublesSketch
   * @param mem the given Memory
   * @param seed the given seed
   * @return an ArrayOfDoublesSketch
   */
  public static ArrayOfDoublesSketch wrap(final Memory mem, final long seed) {
    final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(mem);
    if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) {
      return new DirectArrayOfDoublesQuickSelectSketchR(mem, seed);
    }
    return new DirectArrayOfDoublesCompactSketch(mem, seed);
  }

  /**
   * Estimates the cardinality of the set (number of unique values presented to the sketch)
   * @return best estimate of the number of unique values
   */
  public double getEstimate() {
    if (!isEstimationMode()) { return getRetainedEntries(); }
    return getRetainedEntries() / getTheta();
  }

  /**
   * Gets the approximate upper error bound given the specified number of Standard Deviations.
   * This will return getEstimate() if isEmpty() is true.
   *
   * @param numStdDev
   * See Number of Standard Deviations
   * @return the upper bound.
   */
  public double getUpperBound(final int numStdDev) {
    if (!isEstimationMode()) { return getRetainedEntries(); }
    return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_);
  }

  /**
   * Gets the approximate lower error bound given the specified number of Standard Deviations.
   * This will return getEstimate() if isEmpty() is true.
   *
   * @param numStdDev
   * See Number of Standard Deviations
   * @return the lower bound.
   */
  public double getLowerBound(final int numStdDev) {
    if (!isEstimationMode()) { return getRetainedEntries(); }
    return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_);
  }

  /**
   * Returns true if this sketch's data structure is backed by Memory or WritableMemory.
   * @return true if this sketch's data structure is backed by Memory or WritableMemory.
   */
  public abstract boolean hasMemory();

  /**
   * Returns the Memory object if it exists, otherwise null.
   * @return the Memory object if it exists, otherwise null.
   */
  abstract Memory getMemory();

  /**
   * See Empty
   * @return true if empty.
   */
  public boolean isEmpty() {
    return isEmpty_;
  }

  /**
   * @return number of double values associated with each key
   */
  public int getNumValues() {
    return numValues_;
  }

  /**
   * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode).
   * This is true if theta < 1.0 AND isEmpty() is false.
   * @return true if the sketch is in estimation mode.
   */
  public boolean isEstimationMode() {
    return ((thetaLong_ < Long.MAX_VALUE) && !isEmpty());
  }

  /**
   * Gets the value of theta as a double between zero and one
   * @return the value of theta as a double
   */
  public double getTheta() {
    return getThetaLong() / (double) Long.MAX_VALUE;
  }

  /**
   * @return number of retained entries
   */
  public abstract int getRetainedEntries();

  /**
   * @return the maximum number of bytes for this sketch when serialized.
   */
  public abstract int getMaxBytes();

  /**
   * For compact sketches this is the same as getMaxBytes().
   * @return the current number of bytes for this sketch when serialized.
   */
  public abstract int getCurrentBytes();

  /**
   * @return serialized representation of the sketch
   */
  public abstract byte[] toByteArray();

  /**
   * @return array of arrays of double values in the sketch
   */
  public abstract double[][] getValues();

  abstract double[] getValuesAsOneDimension();

  abstract long[] getKeys();

  /**
   * @return the value of theta as a long
   */
  long getThetaLong() {
    return isEmpty() ? Long.MAX_VALUE : thetaLong_;
  }

  abstract short getSeedHash();

  /**
   * @return iterator over the sketch
   */
  public abstract ArrayOfDoublesSketchIterator iterator();

  /**
   * @return this sketch in compact form, which is immutable.
   */
  public ArrayOfDoublesCompactSketch compact() {
    return compact(null);
  }

  /**
   *
   * @param dstMem the destination WritableMemory
   * @return this sketch in compact form, which is immutable.
   */
  public abstract ArrayOfDoublesCompactSketch compact(WritableMemory dstMem);

  @Override
  public String toString() {
    final int seedHash = Short.toUnsignedInt(getSeedHash());
    final StringBuilder sb = new StringBuilder();
    sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS);
    sb.append("   Estimate                : ").append(getEstimate()).append(LS);
    sb.append("   Upper Bound, 95% conf   : ").append(getUpperBound(2)).append(LS);
    sb.append("   Lower Bound, 95% conf   : ").append(getLowerBound(2)).append(LS);
    sb.append("   Theta (double)          : ").append(getTheta()).append(LS);
    sb.append("   Theta (long)            : ").append(getThetaLong()).append(LS);
    sb.append("   EstMode?                : ").append(isEstimationMode()).append(LS);
    sb.append("   Empty?                  : ").append(isEmpty()).append(LS);
    sb.append("   Retained Entries        : ").append(getRetainedEntries()).append(LS);
    if (this instanceof ArrayOfDoublesUpdatableSketch) {
      final ArrayOfDoublesUpdatableSketch updatable = (ArrayOfDoublesUpdatableSketch) this;
      sb.append("   Nominal Entries (k)     : ").append(updatable.getNominalEntries()).append(LS);
      sb.append("   Current Capacity        : ").append(updatable.getCurrentCapacity()).append(LS);
      sb.append("   Resize Factor           : ").append(updatable.getResizeFactor().getValue()).append(LS);
      sb.append("   Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS);
    }
    sb.append("   Seed Hash               : ")
      .append(Integer.toHexString(seedHash)).append(" | ").append(seedHash).append(LS);
    sb.append("### END SKETCH SUMMARY").append(LS);
    return sb.toString();
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy