com.yahoo.sketches.quantiles.DoublesUnionImpl Maven / Gradle / Ivy
/*
* Copyright 2016, Yahoo! Inc. Licensed under the terms of the
* Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches.quantiles;
import static com.yahoo.sketches.Util.LS;
import static com.yahoo.sketches.quantiles.DoublesUtil.copyToHeap;
import com.yahoo.memory.Memory;
/**
* Union operation for on-heap.
*
* @author Lee Rhodes
* @author Kevin Lang
*/
final class DoublesUnionImpl extends DoublesUnion {
private int maxK_;
private UpdateDoublesSketch gadget_ = null;
private DoublesUnionImpl(final int maxK) {
maxK_ = maxK;
}
/**
* Returns a empty Heap DoublesUnion object.
* @param maxK determines the accuracy and size of the union and is a maximum value.
* The effective k can be smaller due to unions with smaller k sketches.
* It is recommended that maxK be a power of 2 to enable unioning of sketches with
* different values of k.
*/
static DoublesUnionImpl heapInstance(final int maxK) {
final DoublesUnionImpl union = new DoublesUnionImpl(maxK);
return union;
}
/**
* Returns a empty DoublesUnion object that refers to the given direct, off-heap Memory,
* which will be initialized to the empty state.
*
* @param maxK determines the accuracy and size of the union and is a maximum value.
* The effective k can be smaller due to unions with smaller k sketches.
* It is recommended that maxK be a power of 2 to enable unioning of sketches with
* different values of k.
* @param dstMem the Memory to be used by the sketch
* @return a DoublesUnion object
*/
static DoublesUnionImpl directInstance(final int maxK, final Memory dstMem) {
final DirectUpdateDoublesSketch sketch = DirectUpdateDoublesSketch.newInstance(maxK, dstMem);
final DoublesUnionImpl union = new DoublesUnionImpl(maxK);
union.maxK_ = maxK;
union.gadget_ = sketch;
return union;
}
/**
* Returns a Heap DoublesUnion object that has been initialized with the data from the given
* sketch.
*
* @param sketch A DoublesSketch to be used as a source of data only and will not be modified.
* @return a DoublesUnion object
*/
static DoublesUnionImpl heapifyInstance(final DoublesSketch sketch) {
final int k = sketch.getK();
final DoublesUnionImpl union = new DoublesUnionImpl(k);
union.maxK_ = k;
union.gadget_ = copyToHeap(sketch);
return union;
}
/**
* Returns a Heap DoublesUnion object that has been initialized with the data from the given
* Memory image of a DoublesSketch. The srcMem object will not be modified and a reference to
* it is not retained. The maxK of the resulting union will be that obtained from
* the sketch Memory image.
*
* @param srcMem a Memory image of a quantiles DoublesSketch
* @return a DoublesUnion object
*/
static DoublesUnionImpl heapifyInstance(final Memory srcMem) {
final long n = srcMem.getLong(PreambleUtil.N_LONG);
final int k = srcMem.getShort(PreambleUtil.K_SHORT) & 0xFFFF;
final HeapUpdateDoublesSketch sketch = (n == 0)
? HeapUpdateDoublesSketch.newInstance(k)
: HeapUpdateDoublesSketch.heapifyInstance(srcMem);
final DoublesUnionImpl union = new DoublesUnionImpl(k);
union.maxK_ = k;
union.gadget_ = sketch;
return union;
}
/**
* Returns a Union object that wraps off-heap data structure of the given memory image of
* a non-compact DoublesSketch. The data structures of the Union remain off-heap.
*
* @param mem A memory image of a non-compact DoublesSketch to be used as the data
* structure for the union and will be modified.
* @return a Union object
*/
static DoublesUnionImpl wrapInstance(final Memory mem) {
final DirectUpdateDoublesSketch sketch = DirectUpdateDoublesSketch.wrapInstance(mem);
final int k = sketch.getK();
final DoublesUnionImpl union = new DoublesUnionImpl(k);
union.maxK_ = k;
union.gadget_ = sketch;
return union;
}
@Override
public void update(final DoublesSketch sketchIn) {
gadget_ = updateLogic(maxK_, gadget_, sketchIn);
}
@Override
public void update(final Memory mem) {
gadget_ = updateLogic(maxK_, gadget_, HeapUpdateDoublesSketch.heapifyInstance(mem));
}
@Override
public void update(final double dataItem) {
if (gadget_ == null) {
gadget_ = HeapUpdateDoublesSketch.newInstance(maxK_);
}
gadget_.update(dataItem);
}
@Override
public DoublesSketch getResult() {
if (gadget_ == null) {
return HeapUpdateDoublesSketch.newInstance(maxK_);
}
return DoublesUtil.copyToHeap(gadget_); //can't have any externally owned handles.
}
@Override
public DoublesSketch getResultAndReset() {
if (gadget_ == null) { return null; } //Intentionally return null here for speed.
final DoublesSketch ds = gadget_;
gadget_ = null;
return ds;
}
@Override
public void reset() {
gadget_ = null;
}
// @Override //TODO
// public byte[] toByteArray() {
// if (gadget_ == null) {
// final HeapUpdateDoublesSketch sketch = HeapUpdateDoublesSketch.newInstance(maxK_);
// return DoublesByteArrayImpl.toByteArray(sketch, true, false);
// }
// return DoublesByteArrayImpl.toByteArray(gadget_, true, false);
// }
@Override
public boolean isEmpty() {
return (gadget_ == null) ? true : gadget_.isEmpty();
}
@Override
public boolean isDirect() {
return (gadget_ == null) ? false : gadget_.isDirect();
}
@Override
public int getMaxK() {
return maxK_;
}
@Override
public int getEffectiveK() {
return (gadget_ != null) ? gadget_.getK() : maxK_;
}
@Override
public String toString() {
return toString(true, false);
}
@Override
public String toString(final boolean sketchSummary, final boolean dataDetail) {
final StringBuilder sb = new StringBuilder();
final String thisSimpleName = this.getClass().getSimpleName();
final int maxK = this.getMaxK();
final String kStr = String.format("%,d", maxK);
sb.append(Util.LS).append("### Quantiles ").append(thisSimpleName).append(LS);
sb.append(" maxK : ").append(kStr);
if (gadget_ == null) {
sb.append(HeapUpdateDoublesSketch.newInstance(maxK_).toString());
return sb.toString();
}
sb.append(gadget_.toString(sketchSummary, dataDetail));
return sb.toString();
}
//@formatter:off
@SuppressWarnings("null")
static UpdateDoublesSketch updateLogic(final int myMaxK, final UpdateDoublesSketch myQS,
final DoublesSketch other) {
int sw1 = ((myQS == null) ? 0 : myQS.isEmpty() ? 4 : 8);
sw1 |= ((other == null) ? 0 : other.isEmpty() ? 1 : 2);
int outCase = 0; //0=null, 1=NOOP, 2=copy, 3=merge
switch (sw1) {
case 0: outCase = 0; break; //myQS = null, other = null ; return null
case 1: outCase = 4; break; //myQS = null, other = empty; create empty-heap(myMaxK)
case 2: outCase = 2; break; //myQS = null, other = valid; stream or downsample to myMaxK
case 4: outCase = 1; break; //myQS = empty, other = null ; no-op
case 5: outCase = 1; break; //myQS = empty, other = empty; no-op
case 6: outCase = 3; break; //myQS = empty, other = valid; merge
case 8: outCase = 1; break; //myQS = valid, other = null ; no-op
case 9: outCase = 1; break; //myQS = valid, other = empty: no-op
case 10: outCase = 3; break; //myQS = valid, other = valid; merge
//default: //This cannot happen and cannot be tested
}
UpdateDoublesSketch ret = null;
switch (outCase) {
case 0: ret = null; break; //return null
case 1: ret = myQS; break; //no-op
case 2: { //myQS = null, other = valid; stream or downsample to myMaxK
if (!other.isEstimationMode()) { //other is exact, stream items in
ret = HeapUpdateDoublesSketch.newInstance(myMaxK);
// exact mode, only need copy base buffer
final DoublesSketchAccessor otherAccessor = DoublesSketchAccessor.wrap(other);
for (int i = 0; i < otherAccessor.numItems(); ++i) {
ret.update(otherAccessor.get(i));
}
}
else { //myQS = null, other is est mode
ret = (myMaxK < other.getK())
? other.downSampleInternal(other, myMaxK, null) //null mem
: DoublesUtil.copyToHeap(other); //copy required because caller has handle
}
break;
}
case 3: { //myQS = empty/valid, other = valid; merge
if (!other.isEstimationMode()) { //other is exact, stream items in
ret = myQS;
// exact mode, only need copy base buffer
final DoublesSketchAccessor otherAccessor = DoublesSketchAccessor.wrap(other);
for (int i = 0; i < otherAccessor.numItems(); ++i) {
ret.update(otherAccessor.get(i));
}
}
else { //myQS = empty/valid, other = valid and in est mode
if (myQS.getK() <= other.getK()) { //I am smaller or equal, thus the target
DoublesMergeImpl.mergeInto(other, myQS);
ret = myQS;
}
else { //Bigger: myQS.getK() > other.getK(), must effectively downsize me or swap
if (myQS.isEmpty()) {
if (myQS.isDirect()) {
final Memory mem = myQS.getMemory(); //myQS is empty, ok to reconfigure
other.putMemory(mem, false); // not compact, but BB ordered
ret = DirectUpdateDoublesSketch.wrapInstance(mem);
} else { //myQS is empty and on heap
ret = DoublesUtil.copyToHeap(other);
}
}
else { //Not Empty: myQS has data, downsample to tmp
final UpdateDoublesSketch tmp = DoublesSketch.builder().build(other.getK());
DoublesMergeImpl.downSamplingMergeInto(myQS, tmp); //myData -> tmp
ret = (myQS.isDirect())
? DoublesSketch.builder().initMemory(myQS.getMemory()).build(other.getK())
: DoublesSketch.builder().build(other.getK());
DoublesMergeImpl.mergeInto(tmp, ret);
DoublesMergeImpl.mergeInto(other, ret);
}
}
}
break;
}
case 4: { //myQS = null, other = empty; create empty-heap(myMaxK)
ret = HeapUpdateDoublesSketch.newInstance(myMaxK);
break;
}
//default: //This cannot happen and cannot be tested
}
return ret;
}
//@formatter:on
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy