All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.tuple.ArrayOfDoublesUnion Maven / Gradle / Ivy

There is a newer version: 0.6.0
Show newest version
/*
 * Copyright 2015-16, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.tuple;

import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;

import com.yahoo.memory.Memory;
import com.yahoo.memory.WritableMemory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.SketchesArgumentException;

/**
 * The base class for unions of tuple sketches of type ArrayOfDoubles.
 */
public abstract class ArrayOfDoublesUnion {

  static final byte serialVersionUID = 1;

  static final int PREAMBLE_SIZE_BYTES = 16;
  // Layout of first 16 bytes:
  // Long || Start Byte Adr:
  // Adr:
  //      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
  //  0   ||    Seed Hash    | #Dbls  |  Flags | SkType | FamID  | SerVer |  Preamble_Longs    |
  //      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
  //  1   ||------------------------------Theta Long-------------------------------------------|

  static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1
  static final int SERIAL_VERSION_BYTE = 1;
  static final int FAMILY_ID_BYTE = 2;
  static final int SKETCH_TYPE_BYTE = 3;
  static final int FLAGS_BYTE = 4;
  static final int NUM_VALUES_BYTE = 5;
  static final int SEED_HASH_SHORT = 6;
  static final int THETA_LONG = 8;

  final int nomEntries_;
  final int numValues_;
  final long seed_;
  final short seedHash_;
  ArrayOfDoublesQuickSelectSketch sketch_;
  long theta_;

  ArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch sketch) {
    nomEntries_ = sketch.getNominalEntries();
    numValues_ = sketch.getNumValues();
    seed_ = sketch.getSeed();
    seedHash_ = Util.computeSeedHash(seed_);
    sketch_ = sketch;
    theta_ = sketch.getThetaLong();
  }

  /**
   * Heapify the given Memory as an ArrayOfDoublesUnion
   * @param mem the given Memory
   * @return an ArrayOfDoublesUnion
   */
  public static ArrayOfDoublesUnion heapify(final Memory mem) {
    return heapify(mem, DEFAULT_UPDATE_SEED);
  }

  /**
   * Heapify the given Memory and seed as an ArrayOfDoublesUnion
   * @param mem the given Memory
   * @param seed the given seed
   * @return an ArrayOfDoublesUnion
   */
  public static ArrayOfDoublesUnion heapify(final Memory mem, final long seed) {
    return HeapArrayOfDoublesUnion.heapifyUnion(mem, seed);
  }

  /**
   * Wrap the given Memory as an ArrayOfDoublesUnion
   * @param mem the given Memory
   * @return an ArrayOfDoublesUnion
   */
  public static ArrayOfDoublesUnion wrap(final Memory mem) {
    return wrap(mem, DEFAULT_UPDATE_SEED);
  }

  /**
   * Wrap the given Memory and seed as an ArrayOfDoublesUnion
   * @param mem the given Memory
   * @param seed the given seed
   * @return an ArrayOfDoublesUnion
   */
  public static ArrayOfDoublesUnion wrap(final Memory mem, final long seed) {
    return wrapUnionImpl((WritableMemory) mem, seed, false);
  }

  /**
   * Wrap the given WritableMemory as an ArrayOfDoublesUnion
   * @param mem the given Memory
   * @return an ArrayOfDoublesUnion
   */
  public static ArrayOfDoublesUnion wrap(final WritableMemory mem) {
    return wrap(mem, DEFAULT_UPDATE_SEED);
  }

  /**
   * Wrap the given WritableMemory and seed as an ArrayOfDoublesUnion
   * @param mem the given Memory
   * @param seed the given seed
   * @return an ArrayOfDoublesUnion
   */
  public static ArrayOfDoublesUnion wrap(final WritableMemory mem, final long seed) {
    return wrapUnionImpl(mem, seed, true);
  }

  /**
   * Updates the union by adding a set of entries from a given sketch
   * @param sketchIn sketch to add to the union
   */
  public void update(final ArrayOfDoublesSketch sketchIn) {
    if (sketchIn == null) { return; }
    Util.checkSeedHashes(seedHash_, sketchIn.getSeedHash());
    if (sketch_.getNumValues() != sketchIn.getNumValues()) {
      throw new SketchesArgumentException("Incompatible sketches: number of values mismatch "
          + sketch_.getNumValues() + " and " + sketchIn.getNumValues());
    }
    if (sketchIn.isEmpty()) { return; }
    if (sketchIn.getThetaLong() < theta_) { theta_ = sketchIn.getThetaLong(); }
    final ArrayOfDoublesSketchIterator it = sketchIn.iterator();
    while (it.next()) {
      sketch_.merge(it.getKey(), it.getValues());
    }
  }

  /**
   * Returns the resulting union in the form of a compact sketch
   * @param dstMem memory for the result (can be null)
   * @return compact sketch representing the union (off-heap if memory is provided)
   */
  public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) {
    if (sketch_.getRetainedEntries() > sketch_.getNominalEntries()) {
      theta_ = Math.min(theta_, sketch_.getNewTheta());
    }
    if (dstMem == null) {
      return new HeapArrayOfDoublesCompactSketch(sketch_, theta_);
    }
    return new DirectArrayOfDoublesCompactSketch(sketch_, theta_, dstMem);
  }

  /**
   * Returns the resulting union in the form of a compact sketch
   * @return on-heap compact sketch representing the union
   */
  public ArrayOfDoublesCompactSketch getResult() {
    return getResult(null);
  }

  /**
   * Resets the union to an empty state
   */
  public abstract void reset();

  /**
   * @return a byte array representation of this object
   */
  public byte[] toByteArray() {
    final int sizeBytes = PREAMBLE_SIZE_BYTES + sketch_.getSerializedSizeBytes();
    final byte[] byteArray = new byte[sizeBytes];
    final WritableMemory mem = WritableMemory.wrap(byteArray);
    mem.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1
    mem.putByte(SERIAL_VERSION_BYTE, serialVersionUID);
    mem.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID());
    mem.putByte(SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal());
    mem.putLong(THETA_LONG, theta_);
    sketch_.serializeInto(mem.writableRegion(PREAMBLE_SIZE_BYTES, mem.getCapacity() - PREAMBLE_SIZE_BYTES));
    return byteArray;
  }

  /**
   * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
   * given value.
   * @param numValues Number of double values to keep for each key
   * @return maximum required storage bytes given nomEntries and numValues
   */
  public static int getMaxBytes(final int nomEntries, final int numValues) {
    return ArrayOfDoublesQuickSelectSketch.getMaxBytes(nomEntries, numValues);
  }

  void setThetaLong(final long theta) {
    theta_ = theta;
  }

  static ArrayOfDoublesUnion wrapUnionImpl(final WritableMemory mem, final long seed,
      final boolean isWritable) {
    final SerializerDeserializer.SketchType type = SerializerDeserializer.getSketchType(mem);
    final ArrayOfDoublesQuickSelectSketch sketch;
    final ArrayOfDoublesUnion union;

    // compatibility with version 0.9.1 and lower
    if (type == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) {
      if (isWritable) {
        sketch = new DirectArrayOfDoublesQuickSelectSketch(mem, seed);
        union = new DirectArrayOfDoublesUnion(sketch, mem);
      } else {
        sketch = new DirectArrayOfDoublesQuickSelectSketchR(mem, seed);
        union = new DirectArrayOfDoublesUnionR(sketch, mem);
      }
      return union; //Do not need to set theta_
    }
    //versions > 0.9.1

    //sanity checks
    final byte version = mem.getByte(ArrayOfDoublesUnion.SERIAL_VERSION_BYTE);
    if (version != ArrayOfDoublesUnion.serialVersionUID) {
      throw new SketchesArgumentException("Serial version mismatch. Expected: "
        + ArrayOfDoublesUnion.serialVersionUID + ", actual: " + version);
    }
    SerializerDeserializer.validateFamily(mem.getByte(ArrayOfDoublesUnion.FAMILY_ID_BYTE),
        mem.getByte(ArrayOfDoublesUnion.PREAMBLE_LONGS_BYTE));
    SerializerDeserializer.validateType(mem.getByte(ArrayOfDoublesUnion.SKETCH_TYPE_BYTE),
        SerializerDeserializer.SketchType.ArrayOfDoublesUnion);

    if (isWritable) {
      final WritableMemory sketchMem = mem.writableRegion(ArrayOfDoublesUnion.PREAMBLE_SIZE_BYTES,
          mem.getCapacity() - ArrayOfDoublesUnion.PREAMBLE_SIZE_BYTES);
      sketch = new DirectArrayOfDoublesQuickSelectSketch(sketchMem, seed);
      union = new DirectArrayOfDoublesUnion(sketch, mem);

    } else {
      final Memory sketchMem = mem.region(ArrayOfDoublesUnion.PREAMBLE_SIZE_BYTES,
          mem.getCapacity() - ArrayOfDoublesUnion.PREAMBLE_SIZE_BYTES);
      sketch = new DirectArrayOfDoublesQuickSelectSketchR(sketchMem, seed);
      union = new DirectArrayOfDoublesUnionR(sketch, mem);
    }
    union.theta_ = mem.getLong(ArrayOfDoublesUnion.THETA_LONG);
    return union;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy