All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.quantiles.HeapUpdateDoublesSketch Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.quantiles;

import static org.apache.datasketches.quantiles.PreambleUtil.COMPACT_FLAG_MASK;
import static org.apache.datasketches.quantiles.PreambleUtil.EMPTY_FLAG_MASK;
import static org.apache.datasketches.quantiles.PreambleUtil.MAX_DOUBLE;
import static org.apache.datasketches.quantiles.PreambleUtil.MIN_DOUBLE;
import static org.apache.datasketches.quantiles.PreambleUtil.extractFamilyID;
import static org.apache.datasketches.quantiles.PreambleUtil.extractFlags;
import static org.apache.datasketches.quantiles.PreambleUtil.extractK;
import static org.apache.datasketches.quantiles.PreambleUtil.extractN;
import static org.apache.datasketches.quantiles.PreambleUtil.extractPreLongs;
import static org.apache.datasketches.quantiles.PreambleUtil.extractSerVer;
import static org.apache.datasketches.quantiles.Util.computeBaseBufferItems;
import static org.apache.datasketches.quantiles.Util.computeBitPattern;
import static org.apache.datasketches.quantiles.Util.computeCombinedBufferItemCapacity;
import static org.apache.datasketches.quantiles.Util.computeRetainedItems;

import java.util.Arrays;

import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;

/**
 * Implements the DoublesSketch on the Java heap.
 *
 * @author Lee Rhodes
 * @author Jon Malkin
 */
final class HeapUpdateDoublesSketch extends UpdateDoublesSketch {
  static final int MIN_HEAP_DOUBLES_SER_VER = 1;

  /**
   * The smallest value ever seen in the stream.
   */
  private double minValue_;

  /**
   * The largest value ever seen in the stream.
   */
  private double maxValue_;

  /**
   * The total count of items seen.
   */
  private long n_;

  /**
   * Number of samples currently in base buffer.
   *
   * 

Count = N % (2*K) */ private int baseBufferCount_; /** * Active levels expressed as a bit pattern. * *

Pattern = N / (2 * K) */ private long bitPattern_; /** * This single array contains the base buffer plus all levels some of which may not be used, * i.e, is in non-compact form. * A level is of size K and is either full and sorted, or not used. A "not used" buffer may have * garbage. Whether a level buffer used or not is indicated by the bitPattern_. * The base buffer has length 2*K but might not be full and isn't necessarily sorted. * The base buffer precedes the level buffers. This buffer does not include the min, max values. * *

The levels arrays require quite a bit of explanation, which we defer until later.

*/ private double[] combinedBuffer_; //**CONSTRUCTORS********************************************************** private HeapUpdateDoublesSketch(final int k) { super(k); //Checks k } /** * Obtains a new on-heap instance of a DoublesSketch. * * @param k Parameter that controls space usage of sketch and accuracy of estimates. * Must be greater than 1 and less than 65536 and a power of 2. * @return a HeapUpdateDoublesSketch */ static HeapUpdateDoublesSketch newInstance(final int k) { final HeapUpdateDoublesSketch hqs = new HeapUpdateDoublesSketch(k); final int baseBufAlloc = 2 * Math.min(DoublesSketch.MIN_K, k); //the min is important hqs.n_ = 0; hqs.combinedBuffer_ = new double[baseBufAlloc]; hqs.baseBufferCount_ = 0; hqs.bitPattern_ = 0; hqs.minValue_ = Double.NaN; hqs.maxValue_ = Double.NaN; return hqs; } /** * Heapifies the given srcMem, which must be a Memory image of a DoublesSketch and may have data. * * @param srcMem a Memory image of a sketch, which may be in compact or not compact form. * See Memory * @return a DoublesSketch on the Java heap. */ static HeapUpdateDoublesSketch heapifyInstance(final Memory srcMem) { final long memCapBytes = srcMem.getCapacity(); if (memCapBytes < 8) { throw new SketchesArgumentException("Source Memory too small: " + memCapBytes + " < 8"); } final int preLongs = extractPreLongs(srcMem); final int serVer = extractSerVer(srcMem); final int familyID = extractFamilyID(srcMem); final int flags = extractFlags(srcMem); final int k = extractK(srcMem); final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; //Preamble flags empty state final long n = empty ? 0 : extractN(srcMem); //VALIDITY CHECKS DoublesUtil.checkDoublesSerVer(serVer, MIN_HEAP_DOUBLES_SER_VER); Util.checkHeapFlags(flags); checkPreLongsFlagsSerVer(flags, serVer, preLongs); Util.checkFamilyID(familyID); final HeapUpdateDoublesSketch hds = newInstance(k); //checks k if (empty) { return hds; } //Not empty, must have valid preamble + min, max, n. //Forward compatibility from SerVer = 1 : final boolean srcIsCompact = (serVer == 2) | ((flags & COMPACT_FLAG_MASK) > 0); checkHeapMemCapacity(k, n, srcIsCompact, serVer, memCapBytes); //set class members by computing them hds.n_ = n; final int combBufCap = computeCombinedBufferItemCapacity(k, n); hds.baseBufferCount_ = computeBaseBufferItems(k, n); hds.bitPattern_ = computeBitPattern(k, n); //Extract min, max, data from srcMem into Combined Buffer hds.srcMemoryToCombinedBuffer(srcMem, serVer, srcIsCompact, combBufCap); return hds; } @Override public double getMaxValue() { return maxValue_; } @Override public double getMinValue() { return minValue_; } @Override public long getN() { return n_; } @Override public boolean isDirect() { return false; } @Override public void reset() { n_ = 0; final int combinedBufferItemCapacity = 2 * Math.min(DoublesSketch.MIN_K, k_); //min is important combinedBuffer_ = new double[combinedBufferItemCapacity]; baseBufferCount_ = 0; bitPattern_ = 0; minValue_ = Double.NaN; maxValue_ = Double.NaN; } @Override public void update(final double dataItem) { if (Double.isNaN(dataItem)) { return; } if (n_ == 0) { putMaxValue(dataItem); putMinValue(dataItem); } else { if (dataItem > getMaxValue()) { putMaxValue(dataItem); } if (dataItem < getMinValue()) { putMinValue(dataItem); } } //don't increment n_ and baseBufferCount_ yet final int curBBCount = baseBufferCount_; final int newBBCount = curBBCount + 1; final long newN = n_ + 1; final int combBufItemCap = combinedBuffer_.length; if (newBBCount > combBufItemCap) { growBaseBuffer(); //only changes combinedBuffer when it is only a base buffer } //put the new item in the base buffer combinedBuffer_[curBBCount] = dataItem; if (newBBCount == (k_ << 1)) { //Propagate // make sure there will be enough space (levels) for the propagation final int spaceNeeded = DoublesUpdateImpl.getRequiredItemCapacity(k_, newN); if (spaceNeeded > combBufItemCap) { // copies base buffer plus old levels, adds space for new level growCombinedBuffer(combBufItemCap, spaceNeeded); } // sort only the (full) base buffer via accessor which modifies the underlying base buffer, // then use as one of the inputs to propagate-carry final DoublesSketchAccessor bbAccessor = DoublesSketchAccessor.wrap(this, true); bbAccessor.sort(); final long newBitPattern = DoublesUpdateImpl.inPlacePropagateCarry( 0, // starting level null, bbAccessor, true, k_, DoublesSketchAccessor.wrap(this, true), bitPattern_ ); assert newBitPattern == computeBitPattern(k_, newN); // internal consistency check assert newBitPattern == (bitPattern_ + 1); bitPattern_ = newBitPattern; baseBufferCount_ = 0; } else { //bitPattern unchanged baseBufferCount_ = newBBCount; } n_ = newN; } /** * Loads the Combined Buffer, min and max from the given source Memory. * The resulting Combined Buffer is always in non-compact form and must be pre-allocated. * @param srcMem the given source Memory * @param serVer the serialization version of the source * @param srcIsCompact true if the given source Memory is in compact form * @param combBufCap total items for the combined buffer (size in doubles) */ private void srcMemoryToCombinedBuffer(final Memory srcMem, final int serVer, final boolean srcIsCompact, final int combBufCap) { final int preLongs = 2; final int extra = (serVer == 1) ? 3 : 2; // space for min and max values, buf alloc (SerVer 1) final int preBytes = (preLongs + extra) << 3; final int bbCnt = baseBufferCount_; final int k = getK(); final long n = getN(); final double[] combinedBuffer = new double[combBufCap]; //always non-compact //Load min, max putMinValue(srcMem.getDouble(MIN_DOUBLE)); putMaxValue(srcMem.getDouble(MAX_DOUBLE)); if (srcIsCompact) { //Load base buffer srcMem.getDoubleArray(preBytes, combinedBuffer, 0, bbCnt); //Load levels from compact srcMem long bitPattern = bitPattern_; if (bitPattern != 0) { long memOffset = preBytes + (bbCnt << 3); int combBufOffset = 2 * k; while (bitPattern != 0L) { if ((bitPattern & 1L) > 0L) { srcMem.getDoubleArray(memOffset, combinedBuffer, combBufOffset, k); memOffset += (k << 3); //bytes, increment compactly } combBufOffset += k; //doubles, increment every level bitPattern >>>= 1; } } } else { //srcMem not compact final int levels = Util.computeNumLevelsNeeded(k, n); final int totItems = (levels == 0) ? bbCnt : (2 + levels) * k; srcMem.getDoubleArray(preBytes, combinedBuffer, 0, totItems); } putCombinedBuffer(combinedBuffer); } //Restricted overrides //Gets @Override int getBaseBufferCount() { return baseBufferCount_; } @Override int getCombinedBufferItemCapacity() { return combinedBuffer_.length; } @Override double[] getCombinedBuffer() { return combinedBuffer_; } @Override long getBitPattern() { return bitPattern_; } @Override WritableMemory getMemory() { return null; } //Puts @Override void putMinValue(final double minValue) { minValue_ = minValue; } @Override void putMaxValue(final double maxValue) { maxValue_ = maxValue; } @Override void putN(final long n) { n_ = n; } @Override void putCombinedBuffer(final double[] combinedBuffer) { combinedBuffer_ = combinedBuffer; } @Override void putBaseBufferCount(final int baseBufferCount) { baseBufferCount_ = baseBufferCount; } @Override void putBitPattern(final long bitPattern) { bitPattern_ = bitPattern; } @Override //the return value is not always used double[] growCombinedBuffer(final int currentSpace, final int spaceNeeded) { combinedBuffer_ = Arrays.copyOf(combinedBuffer_, spaceNeeded); return combinedBuffer_; } /** * This is only used for on-heap sketches, and grows the Base Buffer by factors of 2 until it * reaches the maximum size of 2 * k. It is only called when there are no levels above the * Base Buffer. */ //important: n has not been incremented yet private void growBaseBuffer() { final int oldSize = combinedBuffer_.length; assert oldSize < (2 * k_); final double[] baseBuffer = combinedBuffer_; final int newSize = 2 * Math.max(Math.min(k_, oldSize), DoublesSketch.MIN_K); combinedBuffer_ = Arrays.copyOf(baseBuffer, newSize); } static void checkPreLongsFlagsSerVer(final int flags, final int serVer, final int preLongs) { final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; final boolean compact = (flags & COMPACT_FLAG_MASK) > 0; final int sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0)) + (4 * (serVer & 0xF)) + (32 * (preLongs & 0x3F)); boolean valid = true; switch (sw) { //These are the valid cases. case 38 : break; //!compact, empty, serVer = 1, preLongs = 1; always stored as not compact case 164 : break; //!compact, !empty, serVer = 1, preLongs = 5; always stored as not compact case 42 : break; //!compact, empty, serVer = 2, preLongs = 1; always stored as compact case 72 : break; //!compact, !empty, serVer = 2, preLongs = 2; always stored as compact case 47 : break; // compact, empty, serVer = 3, preLongs = 1; case 46 : break; //!compact, empty, serVer = 3, preLongs = 1; case 79 : break; // compact, empty, serVer = 3, preLongs = 2; case 78 : break; //!compact, empty, serVer = 3, preLongs = 2; case 77 : break; // compact, !empty, serVer = 3, preLongs = 2; case 76 : break; //!compact, !empty, serVer = 3, preLongs = 2; default : //all other case values are invalid valid = false; } if (!valid) { throw new SketchesArgumentException("Possible corruption. Inconsistent state: " + "PreambleLongs = " + preLongs + ", empty = " + empty + ", SerVer = " + serVer + ", Compact = " + compact); } } /** * Checks the validity of the heap memory capacity assuming n, k and the compact state. * @param k the given value of k * @param n the given value of n * @param compact true if memory is in compact form * @param serVer serialization version of the source * @param memCapBytes the current memory capacity in bytes */ static void checkHeapMemCapacity(final int k, final long n, final boolean compact, final int serVer, final long memCapBytes) { final int metaPre = Family.QUANTILES.getMaxPreLongs() + ((serVer == 1) ? 3 : 2); final int retainedItems = computeRetainedItems(k, n); final int reqBufBytes; if (compact) { reqBufBytes = (metaPre + retainedItems) << 3; } else { //not compact final int totLevels = Util.computeNumLevelsNeeded(k, n); reqBufBytes = (totLevels == 0) ? (metaPre + retainedItems) << 3 : (metaPre + ((2 + totLevels) * k)) << 3; } if (memCapBytes < reqBufBytes) { throw new SketchesArgumentException("Possible corruption: Memory capacity too small: " + memCapBytes + " < " + reqBufBytes); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy